Repository: facebookresearch/Detectron
Branch: main
Commit: 04155a01a6ea
Files: 210
Total size: 1008.7 KB

Directory structure:
gitextract_5vn9xfb_/

├── .github/
│   └── issue_template.md
├── .gitignore
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── FAQ.md
├── GETTING_STARTED.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── Makefile
├── NOTICE
├── README.md
├── cmake/
│   ├── Summary.cmake
│   └── legacy/
│       ├── Cuda.cmake
│       ├── Dependencies.cmake
│       ├── Modules/
│       │   └── FindCuDNN.cmake
│       ├── Summary.cmake
│       ├── Utils.cmake
│       └── legacymake.cmake
├── configs/
│   ├── 04_2018_gn_baselines/
│   │   ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
│   │   ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
│   │   ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
│   │   ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
│   │   ├── mask_rcnn_R-50-FPN_1x_gn.yaml
│   │   ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
│   │   └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
│   ├── 12_2017_baselines/
│   │   ├── e2e_faster_rcnn_R-101-FPN_1x.yaml
│   │   ├── e2e_faster_rcnn_R-101-FPN_2x.yaml
│   │   ├── e2e_faster_rcnn_R-50-C4_1x.yaml
│   │   ├── e2e_faster_rcnn_R-50-C4_2x.yaml
│   │   ├── e2e_faster_rcnn_R-50-FPN_1x.yaml
│   │   ├── e2e_faster_rcnn_R-50-FPN_2x.yaml
│   │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
│   │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
│   │   ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml
│   │   ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
│   │   ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml
│   │   ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
│   │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
│   │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
│   │   ├── e2e_mask_rcnn_R-101-FPN_1x.yaml
│   │   ├── e2e_mask_rcnn_R-101-FPN_2x.yaml
│   │   ├── e2e_mask_rcnn_R-50-C4_1x.yaml
│   │   ├── e2e_mask_rcnn_R-50-C4_2x.yaml
│   │   ├── e2e_mask_rcnn_R-50-FPN_1x.yaml
│   │   ├── e2e_mask_rcnn_R-50-FPN_2x.yaml
│   │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
│   │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
│   │   ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
│   │   ├── fast_rcnn_R-101-FPN_1x.yaml
│   │   ├── fast_rcnn_R-101-FPN_2x.yaml
│   │   ├── fast_rcnn_R-50-C4_1x.yaml
│   │   ├── fast_rcnn_R-50-C4_2x.yaml
│   │   ├── fast_rcnn_R-50-FPN_1x.yaml
│   │   ├── fast_rcnn_R-50-FPN_2x.yaml
│   │   ├── fast_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── fast_rcnn_X-101-32x8d-FPN_2x.yaml
│   │   ├── fast_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── fast_rcnn_X-101-64x4d-FPN_2x.yaml
│   │   ├── keypoint_rcnn_R-101-FPN_1x.yaml
│   │   ├── keypoint_rcnn_R-101-FPN_s1x.yaml
│   │   ├── keypoint_rcnn_R-50-FPN_1x.yaml
│   │   ├── keypoint_rcnn_R-50-FPN_s1x.yaml
│   │   ├── keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
│   │   ├── keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
│   │   ├── mask_rcnn_R-101-FPN_1x.yaml
│   │   ├── mask_rcnn_R-101-FPN_2x.yaml
│   │   ├── mask_rcnn_R-50-C4_1x.yaml
│   │   ├── mask_rcnn_R-50-C4_2x.yaml
│   │   ├── mask_rcnn_R-50-FPN_1x.yaml
│   │   ├── mask_rcnn_R-50-FPN_2x.yaml
│   │   ├── mask_rcnn_X-101-32x8d-FPN_1x.yaml
│   │   ├── mask_rcnn_X-101-32x8d-FPN_2x.yaml
│   │   ├── mask_rcnn_X-101-64x4d-FPN_1x.yaml
│   │   ├── mask_rcnn_X-101-64x4d-FPN_2x.yaml
│   │   ├── retinanet_R-101-FPN_1x.yaml
│   │   ├── retinanet_R-101-FPN_2x.yaml
│   │   ├── retinanet_R-50-FPN_1x.yaml
│   │   ├── retinanet_R-50-FPN_2x.yaml
│   │   ├── retinanet_X-101-32x8d-FPN_1x.yaml
│   │   ├── retinanet_X-101-32x8d-FPN_2x.yaml
│   │   ├── retinanet_X-101-64x4d-FPN_1x.yaml
│   │   ├── retinanet_X-101-64x4d-FPN_2x.yaml
│   │   ├── rpn_R-101-FPN_1x.yaml
│   │   ├── rpn_R-50-C4_1x.yaml
│   │   ├── rpn_R-50-FPN_1x.yaml
│   │   ├── rpn_X-101-32x8d-FPN_1x.yaml
│   │   ├── rpn_X-101-64x4d-FPN_1x.yaml
│   │   ├── rpn_person_only_R-101-FPN_1x.yaml
│   │   ├── rpn_person_only_R-50-FPN_1x.yaml
│   │   ├── rpn_person_only_X-101-32x8d-FPN_1x.yaml
│   │   └── rpn_person_only_X-101-64x4d-FPN_1x.yaml
│   ├── getting_started/
│   │   ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
│   │   ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
│   │   ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
│   │   └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
│   └── test_time_aug/
│       ├── e2e_mask_rcnn_R-50-FPN_2x.yaml
│       └── keypoint_rcnn_R-50-FPN_1x.yaml
├── demo/
│   └── NOTICE
├── detectron/
│   ├── __init__.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── config.py
│   │   ├── rpn_generator.py
│   │   ├── test.py
│   │   ├── test_engine.py
│   │   └── test_retinanet.py
│   ├── datasets/
│   │   ├── VOCdevkit-matlab-wrapper/
│   │   │   ├── get_voc_opts.m
│   │   │   ├── voc_eval.m
│   │   │   └── xVOCap.m
│   │   ├── __init__.py
│   │   ├── cityscapes_json_dataset_evaluator.py
│   │   ├── coco_to_cityscapes_id.py
│   │   ├── data/
│   │   │   └── README.md
│   │   ├── dataset_catalog.py
│   │   ├── dummy_datasets.py
│   │   ├── json_dataset.py
│   │   ├── json_dataset_evaluator.py
│   │   ├── roidb.py
│   │   ├── task_evaluation.py
│   │   ├── voc_dataset_evaluator.py
│   │   └── voc_eval.py
│   ├── modeling/
│   │   ├── FPN.py
│   │   ├── ResNet.py
│   │   ├── VGG16.py
│   │   ├── VGG_CNN_M_1024.py
│   │   ├── __init__.py
│   │   ├── detector.py
│   │   ├── fast_rcnn_heads.py
│   │   ├── generate_anchors.py
│   │   ├── keypoint_rcnn_heads.py
│   │   ├── mask_rcnn_heads.py
│   │   ├── model_builder.py
│   │   ├── name_compat.py
│   │   ├── optimizer.py
│   │   ├── retinanet_heads.py
│   │   ├── rfcn_heads.py
│   │   └── rpn_heads.py
│   ├── ops/
│   │   ├── __init__.py
│   │   ├── collect_and_distribute_fpn_rpn_proposals.py
│   │   ├── generate_proposal_labels.py
│   │   ├── generate_proposals.py
│   │   ├── zero_even_op.cc
│   │   ├── zero_even_op.cu
│   │   └── zero_even_op.h
│   ├── roi_data/
│   │   ├── __init__.py
│   │   ├── data_utils.py
│   │   ├── fast_rcnn.py
│   │   ├── keypoint_rcnn.py
│   │   ├── loader.py
│   │   ├── mask_rcnn.py
│   │   ├── minibatch.py
│   │   ├── retinanet.py
│   │   └── rpn.py
│   ├── tests/
│   │   ├── data_loader_benchmark.py
│   │   ├── test_batch_permutation_op.py
│   │   ├── test_bbox_transform.py
│   │   ├── test_cfg.py
│   │   ├── test_loader.py
│   │   ├── test_restore_checkpoint.py
│   │   ├── test_smooth_l1_loss_op.py
│   │   ├── test_spatial_narrow_as_op.py
│   │   └── test_zero_even_op.py
│   └── utils/
│       ├── __init__.py
│       ├── blob.py
│       ├── boxes.py
│       ├── c2.py
│       ├── collections.py
│       ├── colormap.py
│       ├── coordinator.py
│       ├── cython_bbox.pyx
│       ├── cython_nms.pyx
│       ├── env.py
│       ├── image.py
│       ├── io.py
│       ├── keypoints.py
│       ├── logging.py
│       ├── lr_policy.py
│       ├── model_convert_utils.py
│       ├── net.py
│       ├── segms.py
│       ├── subprocess.py
│       ├── timer.py
│       ├── train.py
│       ├── training_stats.py
│       └── vis.py
├── docker/
│   └── Dockerfile
├── projects/
│   └── GN/
│       └── README.md
├── requirements.txt
├── setup.py
└── tools/
    ├── convert_cityscapes_to_coco.py
    ├── convert_coco_model_to_cityscapes.py
    ├── convert_pkl_to_pb.py
    ├── convert_selective_search.py
    ├── generate_testdev_from_test.py
    ├── infer.py
    ├── infer_simple.py
    ├── pickle_caffe_blobs.py
    ├── reval.py
    ├── test_net.py
    ├── train_net.py
    └── visualize_results.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/issue_template.md
================================================
## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING
1. Please thoroughly read README.md, INSTALL.md, GETTING_STARTED.md, and FAQ.md
2. Please search existing *open and closed* issues in case your issue has already been reported
3. Please try to debug the issue in case you can solve it on your own before posting

## After following steps 1-3 above and agreeing to provide the detailed information requested below, you may continue with posting your issue
(**Delete this line and the text above it.**)

### Expected results

What did you expect to see?

### Actual results

What did you observe instead?

### Detailed steps to reproduce

E.g.:

```
The command that you ran
```

### System information

* Operating system: ?
* Compiler version: ?
* CUDA version: ?
* cuDNN version: ?
* NVIDIA driver version: ?
* GPU models (for all devices if they are not all the same): ?
* `PYTHONPATH` environment variable: ?
* `python --version` output: ?
* Anything else that seems relevant: ?


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Shared objects
*.so

# Distribution / packaging
build/
*.egg-info/
*.egg

# Temporary files
*.swn
*.swo
*.swp

# Dataset symlinks
detectron/datasets/data/*
!detectron/datasets/data/README.md

# Generated C files
detectron/utils/cython_*.c


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)

# Find the Caffe2 package.
# Caffe2 exports the required targets, so find_package should work for
# the standard Caffe2 installation. If you encounter problems with finding
# the Caffe2 package, make sure you have run `make install` when installing
# Caffe2 (`make install` populates your share/cmake/Caffe2).
find_package(Caffe2 REQUIRED)

if (${CAFFE2_VERSION} VERSION_LESS 0.8.2)
  # Pre-0.8.2 caffe2 does not have proper interface libraries set up, so we
  # will rely on the old path.
  message(WARNING
      "You are using an older version of Caffe2 (version " ${CAFFE2_VERSION}
      "). Please consider moving to a newer version.")
  include(cmake/legacy/legacymake.cmake)
  return()
endif()

# Add compiler flags.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing")

# Print configuration summary.
include(cmake/Summary.cmake)
detectron_print_config_summary()

# Collect custom ops sources.
file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)
file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)

# Install custom CPU ops lib.
add_library(
    caffe2_detectron_custom_ops SHARED
    ${CUSTOM_OPS_CPU_SRCS})

target_include_directories(
    caffe2_detectron_custom_ops PRIVATE
    ${CAFFE2_INCLUDE_DIRS})

target_link_libraries(caffe2_detectron_custom_ops caffe2_library)
install(TARGETS caffe2_detectron_custom_ops DESTINATION lib)

# Install custom GPU ops lib, if gpu is present.
if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)
  # Additional -I prefix is required for CMake versions before commit (< 3.7):
  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594
  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})
  CUDA_ADD_LIBRARY(
      caffe2_detectron_custom_ops_gpu SHARED
      ${CUSTOM_OPS_CPU_SRCS}
      ${CUSTOM_OPS_GPU_SRCS})

  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu_library)
  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)
endif()


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
  advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
  address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Detectron
We want to make contributing to this project as easy and transparent as
possible.

## Our Development Process
Minor changes and improvements will be released on an ongoing basis. Larger
changes (e.g., changesets implementing a new paper) will be released on a more
periodic basis.

## Pull Requests
We actively welcome your pull requests.

1. Fork the repo and create your branch from `master`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. Ensure no regressions in baseline model speed and accuracy.
7. If you haven't already, complete the Contributor License Agreement ("CLA").

## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.

Complete your CLA here: <https://code.facebook.com/cla>

## Issues
GitHub issues will be largely unattended and are mainly intended as a community
forum for collectively debugging issues, hopefully leading to pull requests with
fixes when appropriate.

## Coding Style  
* 4 spaces for indentation rather than tabs
* 80 character line length
* PEP8 formatting

## License
By contributing to Detectron, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.


================================================
FILE: FAQ.md
================================================
# FAQ

This document covers frequently asked questions.

- For general information about Detectron, please see [`README.md`](README.md).
- For installation instructions, please see [`INSTALL.md`](INSTALL.md).
- For a quick getting started guide, please see [`GETTING_STARTED.md`](GETTING_STARTED.md).

#### Q: How do I compute validation AP during training?

**A:** Detectron does not compute validation statistics (e.g., AP) during training because this slows training. Instead, we've implemented a "validation monitor", which is a process that polls for new model checkpoints saved by a training job and when one is found performs inference with it by scheduling a job with `tools/test_net.py` asynchronously using free GPUs in our cluster. We have not released the validation monitor because (1) it's a relatively thin wrapper on top of `tools/train_net.py` and (2) the little code that comprises it is specific to our cluster and would not be generally useful.

#### Q: How do I restrict Detectron to use only a subset of the GPUs on a server?

**A:** Don't modify the code; use the [`CUDA_VISIBLE_DEVICES`](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) environment variable instead.

#### Q: Detection on one image is really slow compared to the reported performance, why?

A: Various algorithms and caches (e.g., from `cudnn`) take some time to warm up. Peak inference performance will not be reached until after a few images have been processed.

Also potentially relevant: inference with Mask R-CNN on high-resolution images may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image.


#### Q: How do I implement a custom Caffe2 CPU or GPU operator for use in Detectron?

**A:** Detectron uses a number of specialized Caffe2 operators that are distributed via the [Caffe2 Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron) as part of the core Caffe2 GitHub repository. If you'd like to implement a custom Caffe2 operator for your project, we have written a toy example illustrating how to add an operator under the Detectron source tree; please see [`detectron/ops/zero_even_op.*`](detectron/ops/) and [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py). For more background on writing Caffe2 operators please consult the [Caffe2 documentation](https://caffe2.ai/docs/custom-operators.html).

#### Q: How do I use Detectron to train a model on a custom dataset?

**A:** If possible, we strongly recommend that you first convert the custom dataset annotation format to the [COCO API json format](http://cocodataset.org/#download). Then, add your dataset to the [dataset catalog](detectron/datasets/dataset_catalog.py) so that Detectron can use it for training and inference. If your dataset cannot be converted to the COCO API json format, then it's likely that more significant code modifications will be required. If the dataset you're adding is popular, please consider making the converted annotations publicly available; If code modifications are required, please consider submitting a pull request.


================================================
FILE: GETTING_STARTED.md
================================================
# Using Detectron

This document provides brief tutorials covering Detectron for inference and training on the COCO dataset.

- For general information about Detectron, please see [`README.md`](README.md).
- For installation instructions, please see [`INSTALL.md`](INSTALL.md).

## Inference with Pretrained Models

#### 1. Directory of Image Files
To run inference on a directory of image files (`demo/*.jpg` in this example), you can use the `infer_simple.py` tool. In this example, we're using an end-to-end trained Mask R-CNN model with a ResNet-101-FPN backbone from the model zoo:

```
python tools/infer_simple.py \
    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \
    --output-dir /tmp/detectron-visualizations \
    --image-ext jpg \
    --wts https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \
    demo
```

Detectron should automatically download the model from the URL specified by the `--wts` argument. This tool will output visualizations of the detections in PDF format in the directory specified by `--output-dir`. Here's an example of the output you should expect to see (for copyright information about the demo images see [`demo/NOTICE`](demo/NOTICE)).

<div align="center">
  <img src="demo/output/17790319373_bd19b24cfc_k_example_output.jpg" width="700px" />
  <p>Example Mask R-CNN output.</p>
</div>

**Notes:**

- When running inference on your own high-resolution images, Mask R-CNN may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image.


#### 2. COCO Dataset
This example shows how to run an end-to-end trained Mask R-CNN model from the model zoo using a single GPU for inference. As configured, this will run inference on all images in `coco_2014_minival` (which must be properly installed).

```
python tools/test_net.py \
    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \
    TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \
    NUM_GPUS 1
```

Running inference with the same model using `$N` GPUs (e.g., `N=8`).

```
python tools/test_net.py \
    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \
    --multi-gpu-testing \
    TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \
    NUM_GPUS $N
```

On an NVIDIA Tesla P100 GPU, inference should take about 130-140 ms per image for this example.


## Training a Model with Detectron

This is a tiny tutorial showing how to train a model on COCO. The model will be an end-to-end trained Faster R-CNN using a ResNet-50-FPN backbone. For the purpose of this tutorial, we'll use a short training schedule and a small input image size so that training and inference will be relatively fast. As a result, the box AP on COCO will be relatively low compared to our [baselines](MODEL_ZOO.md). This example is provided for instructive purposes only (i.e., not for comparing against publications).

#### 1. Training with 1 GPU

```
python tools/train_net.py \
    --cfg configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml \
    OUTPUT_DIR /tmp/detectron-output
```

**Expected results:**

- Output (models, validation set detections, etc.) will be saved under `/tmp/detectron-output`
- On a Maxwell generation GPU (e.g., M40), training should take around 4.2 hours
- Inference time should be around 80ms / image (also on an M40)
- Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs)

### 2. Multi-GPU Training

We've also provided configs to illustrate training with 2, 4, and 8 GPUs using learning schedules that will be approximately equivalent to the one used with 1 GPU above. The configs are located at: `configs/getting_started/tutorial_{2,4,8}gpu_e2e_faster_rcnn_R-50-FPN.yaml`. For example, launching a training job with 2 GPUs will look like this:

```
python tools/train_net.py \
    --multi-gpu-testing \
    --cfg configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml \
    OUTPUT_DIR /tmp/detectron-output
```

Note that we've also added the `--multi-gpu-testing` flag to instruct Detectron to parallelize inference over multiple GPUs (2 in this example; see `NUM_GPUS` in the config file) after training has finished.

**Expected results:**

- Training should take around 2.3 hours (2 x M40)
- Inference time should be around 80ms / image (but in parallel on 2 GPUs, so half the total time)
- Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs)

To understand how learning schedules are adjusted (the "linear scaling rule"), please study these tutorial config files and read our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). **Aside from this tutorial, all of our released configs make use of 8 GPUs. If you will be using fewer than 8 GPUs for training (or do anything else that changes the minibatch size), it is essential that you understand how to manipulate training schedules according to the linear scaling rule.**

**Notes:**

- This training example uses a relatively low GPU-compute model and thus overhead from Caffe2 Python ops is relatively high. As a result, scaling as the number of GPUs is increased from 2 to 8 is relatively poor (e.g., training with 8 GPUs takes about 0.9 hours, only 4.5x faster than with 1 GPU). As larger, more GPU-compute heavy models are used, the scaling improves.


================================================
FILE: INSTALL.md
================================================
# Installing Detectron

This document covers how to install Detectron, its dependencies (including Caffe2), and the COCO dataset.

- For general information about Detectron, please see [`README.md`](README.md).

**Requirements:**

- NVIDIA GPU, Linux, Python2
- Caffe2, various standard Python packages, and the COCO API; Instructions for installing these dependencies are found below

**Notes:**

- Detectron operators currently do not have CPU implementation; a GPU system is required.
- Detectron has been tested extensively with CUDA 8.0 and cuDNN 6.0.21.

## Caffe2

To install Caffe2 with CUDA support, follow the [installation instructions](https://caffe2.ai/docs/getting-started.html) from the [Caffe2 website](https://caffe2.ai/). **If you already have Caffe2 installed, make sure to update your Caffe2 to a version that includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron).**

Please ensure that your Caffe2 installation was successful before proceeding by running the following commands and checking their output as directed in the comments.

```
# To check if Caffe2 build was successful
python -c 'from caffe2.python import core' 2>/dev/null && echo "Success" || echo "Failure"

# To check if Caffe2 GPU build was successful
# This must print a number > 0 in order to use Detectron
python -c 'from caffe2.python import workspace; print(workspace.NumCudaDevices())'
```

If the `caffe2` Python package is not found, you likely need to adjust your `PYTHONPATH` environment variable to include its location (`/path/to/caffe2/build`, where `build` is the Caffe2 CMake build directory).

## Other Dependencies

Install the [COCO API](https://github.com/cocodataset/cocoapi):

```
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python setup.py install --user
```

Note that instructions like `# COCOAPI=/path/to/install/cocoapi` indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (`COCOAPI` in this case) accordingly.

## Detectron

Clone the Detectron repository:

```
# DETECTRON=/path/to/clone/detectron
git clone https://github.com/facebookresearch/detectron $DETECTRON
```

Install Python dependencies:

```
pip install -r $DETECTRON/requirements.txt
```

Set up Python modules:

```
cd $DETECTRON && make
```

Check that Detectron tests pass (e.g. for [`SpatialNarrowAsOp test`](detectron/tests/test_spatial_narrow_as_op.py)):

```
python $DETECTRON/detectron/tests/test_spatial_narrow_as_op.py
```

## That's All You Need for Inference

At this point, you can run inference using pretrained Detectron models. Take a look at our [inference tutorial](GETTING_STARTED.md) for an example. If you want to train models on the COCO dataset, then please continue with the installation instructions.

## Datasets

Detectron finds datasets via symlinks from `detectron/datasets/data` to the actual locations where the dataset images and annotations are stored. For instructions on how to create symlinks for COCO and other datasets, please see [`detectron/datasets/data/README.md`](detectron/datasets/data/README.md).

After symlinks have been created, that's all you need to start training models.

## Advanced Topic: Custom Operators for New Research Projects

Please read the custom operators section of the [`FAQ`](FAQ.md) first.

For convenience, we provide CMake support for building custom operators. All custom operators are built into a single library that can be loaded dynamically from Python.
Place your custom operator implementation under [`detectron/ops/`](detectron/ops/) and see [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py) for an example of how to load custom operators from Python.

Build the custom operators library:

```
cd $DETECTRON && make ops
```

Check that the custom operator tests pass:

```
python $DETECTRON/detectron/tests/test_zero_even_op.py
```

## Docker Image

We provide a [`Dockerfile`](docker/Dockerfile) that you can use to build a Detectron image on top of a Caffe2 image that satisfies the requirements outlined at the top. If you would like to use a Caffe2 image different from the one we use by default, please make sure that it includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron).

Build the image:

```
cd $DETECTRON/docker
docker build -t detectron:c2-cuda9-cudnn7 .
```

Run the image (e.g. for [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py)):

```
nvidia-docker run --rm -it detectron:c2-cuda9-cudnn7 python detectron/tests/test_batch_permutation_op.py
```

## Troubleshooting

In case of Caffe2 installation problems, please read the troubleshooting section of the relevant Caffe2 [installation instructions](https://caffe2.ai/docs/getting-started.html) first. In the following, we provide additional troubleshooting tips for Caffe2 and Detectron.

### Caffe2 Operator Profiling

Caffe2 comes with performance [`profiling`](https://github.com/pytorch/pytorch/tree/master/caffe2/contrib/prof)
support which you may find useful for benchmarking or debugging your operators
(see [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py) for example usage).
Profiling support is not built by default and you can enable it by setting
the `-DUSE_PROF=ON` flag when running Caffe2 CMake.

### CMake Cannot Find CUDA and cuDNN

Sometimes CMake has trouble with finding CUDA and cuDNN dirs on your machine.

When building Caffe2, you can point CMake to CUDA and cuDNN dirs by running:

```
cmake .. \
  # insert your Caffe2 CMake flags here
  -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \
  -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir
```

Similarly, when building custom Detectron operators you can use:

```
cd $DETECTRON
mkdir -p build && cd build
cmake .. \
  -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \
  -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir
make
```

Note that you can use the same commands to get CMake to use specific versions of CUDA and cuDNN out of possibly multiple versions installed on your machine.

### Protobuf Errors

Caffe2 uses protobuf as its serialization format and requires version `3.2.0` or newer.
If your protobuf version is older, you can build protobuf from Caffe2 protobuf submodule and use that version instead.

To build Caffe2 protobuf submodule:

```
# CAFFE2=/path/to/caffe2
cd $CAFFE2/third_party/protobuf/cmake
mkdir -p build && cd build
cmake .. \
  -DCMAKE_INSTALL_PREFIX=$HOME/c2_tp_protobuf \
  -Dprotobuf_BUILD_TESTS=OFF \
  -DCMAKE_CXX_FLAGS="-fPIC"
make install
```

To point Caffe2 CMake to the newly built protobuf:

```
cmake .. \
  # insert your Caffe2 CMake flags here
  -DPROTOBUF_PROTOC_EXECUTABLE=$HOME/c2_tp_protobuf/bin/protoc \
  -DPROTOBUF_INCLUDE_DIR=$HOME/c2_tp_protobuf/include \
  -DPROTOBUF_LIBRARY=$HOME/c2_tp_protobuf/lib64/libprotobuf.a
```

You may also experience problems with protobuf if you have both system and anaconda packages installed.
This could lead to problems as the versions could be mixed at compile time or at runtime.
This issue can also be overcome by following the commands from above.

### Caffe2 Python Binaries

In case you experience issues with CMake being unable to find the required Python paths when
building Caffe2 Python binaries (e.g. in virtualenv), you can try pointing Caffe2 CMake to python
library and include dir by using:

```
cmake .. \
  # insert your Caffe2 CMake flags here
  -DPYTHON_LIBRARY=$(python -c "from distutils import sysconfig; print(sysconfig.get_python_lib())") \
  -DPYTHON_INCLUDE_DIR=$(python -c "from distutils import sysconfig; print(sysconfig.get_python_inc())")
```

### Caffe2 with NNPACK Build

Detectron does not require Caffe2 built with NNPACK support. If you face NNPACK related issues during Caffe2 installation, you can safely disable NNPACK by setting the `-DUSE_NNPACK=OFF` CMake flag.

### Caffe2 with OpenCV Build

Analogously to the NNPACK case above, you can disable OpenCV by setting the `-DUSE_OPENCV=OFF` CMake flag.

### COCO API Undefined Symbol Error

If you encounter a COCO API import error due to an undefined symbol, as reported [here](https://github.com/cocodataset/cocoapi/issues/35),
make sure that your python versions are not getting mixed. For instance, this issue may arise if you have
[both system and conda numpy installed](https://stackoverflow.com/questions/36190757/numpy-undefined-symbol-pyfpe-jbuf).

### CMake Cannot Find Caffe2

In case you experience issues with CMake being unable to find the Caffe2 package when building custom operators,
make sure you have run `make install` as part of your Caffe2 installation process.


================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.

"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.

"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.

"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.

"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.

"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.

"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).

"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.

"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."

"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:

(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and

(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and

(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and

(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.

You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!)  The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: MODEL_ZOO.md
================================================
# Detectron Model Zoo and Baselines

## Introduction

This file documents a large collection of baselines trained with Detectron, primarily in late December 2017. We refer to these results as the *12_2017_baselines*. All configurations for these baselines are located in the `configs/12_2017_baselines` directory. The tables below provide results and useful statistics about training and inference. Links to the trained models as well as their output are provided. Unless noted differently below (see "Notes" under each table), the following common settings are used for all training and inference runs.

#### Common Settings and Notes

- All baselines were run on [Big Basin](https://code.facebook.com/posts/1835166200089399/introducing-big-basin) servers with 8 NVIDIA Tesla P100 GPU accelerators (with 16GB GPU memory, CUDA 8.0, and cuDNN 6.0.21).
- All baselines were trained using 8 GPU data parallel sync SGD with a minibatch size of either 8 or 16 images (see the *im/gpu* column).
- For training, only horizontal flipping data augmentation was used.
- For inference, no test-time augmentations (e.g., multiple scales, flipping) were used.
- All models were trained on the union of `coco_2014_train` and `coco_2014_valminusminival`, which is exactly equivalent to the recently defined `coco_2017_train` dataset.
- All models were tested on the `coco_2014_minival` dataset, which is exactly equivalent to the recently defined `coco_2017_val` dataset.
- Inference times are often expressed as "*X* + *Y*", in which *X* is time taken in reasonably well-optimized GPU code and *Y* is time taken in unoptimized CPU code. (The CPU code time could be reduced substantially with additional engineering.)
- Inference results for boxes, masks, and keypoints ("kps") are provided in the [COCO json format](http://cocodataset.org/#format-data).
- The *model id* column is provided for ease of reference.
- To check downloaded file integrity: for any download URL on this page, simply append `.md5sum` to the URL to download the file's md5 hash.
- All models and results below are on the [COCO dataset](http://cocodataset.org).
- Baseline models and results for the [Cityscapes dataset](https://www.cityscapes-dataset.com/) are coming soon!

#### Training Schedules

We use three training schedules, indicated by the *lr schd* column in the tables below.

- **1x**: For minibatch size 16, this schedule starts at a LR of 0.02 and is decreased by a factor of * 0.1 after 60k and 80k iterations and finally terminates at 90k iterations. This schedules results in 12.17 epochs over the 118,287 images in `coco_2014_train` union `coco_2014_valminusminival` (or equivalently, `coco_2017_train`).
- **2x**: Twice as long as the 1x schedule with the LR change points scaled proportionally.
- **s1x** ("stretched 1x"): This schedule scales the 1x schedule by roughly 1.44x, but also extends the duration of the first learning rate. With a minibatch size of 16, it reduces the LR by * 0.1 at 100k and 120k iterations, finally ending after 130k iterations.

All training schedules also use a 500 iteration linear learning rate warm up. When changing the minibatch size between 8 and 16 images, we adjust the number of SGD iterations and the base learning rate according to the principles outlined in our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).

#### License

All models available for download through this document are licensed under the [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).

#### ImageNet Pretrained Models

The backbone models pretrained on ImageNet are available in the format used by Detectron. Unless otherwise noted, these models are trained on the standard ImageNet-1k dataset.

- [R-50.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl): converted copy of MSRA's original ResNet-50 model
- [R-101.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl): converted copy of MSRA's original ResNet-101 model
- [X-101-64x4d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl): converted copy of FB's original ResNeXt-101-64x4d model trained with Torch7
- [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB
- [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl): ResNeXt-152-32x8d model **trained on ImageNet-5k** with Caffe2 at FB (see our [ResNeXt paper](https://arxiv.org/abs/1611.05431) for details on ImageNet-5k)

#### Log Files

[Training and inference logs](https://dl.fbaipublicfiles.com/detectron/logs/model_zoo_12_2017_baseline_logs.tgz) are available for most models in the model zoo.

## Proposal, Box, and Mask Detection Baselines

### RPN Proposal Baselines

<table><tbody>
<!-- START RPN TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop.<br/>AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>4.3</sub></sup></td>
<td align="right"><sup><sub>0.187</sub></sup></td>
<td align="right"><sup><sub>4.7</sub></sup></td>
<td align="right"><sup><sub>0.113</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>51.6</sub></sup></td>
<td align="right"><sup><sub>35998355</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/train/coco_2014_train%3Acoco_2014_valminusminival/rpn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.416</sub></sup></td>
<td align="right"><sup><sub>10.4</sub></sup></td>
<td align="right"><sup><sub>0.080</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>57.2</sub></sup></td>
<td align="right"><sup><sub>35998814</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.1</sub></sup></td>
<td align="right"><sup><sub>0.503</sub></sup></td>
<td align="right"><sup><sub>12.6</sub></sup></td>
<td align="right"><sup><sub>0.108</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>58.2</sub></sup></td>
<td align="right"><sup><sub>35998887</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>11.5</sub></sup></td>
<td align="right"><sup><sub>1.395</sub></sup></td>
<td align="right"><sup><sub>34.9</sub></sup></td>
<td align="right"><sup><sub>0.292</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>59.4</sub></sup></td>
<td align="right"><sup><sub>35998956</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>11.6</sub></sup></td>
<td align="right"><sup><sub>1.102</sub></sup></td>
<td align="right"><sup><sub>27.6</sub></sup></td>
<td align="right"><sup><sub>0.222</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>59.5</sub></sup></td>
<td align="right"><sup><sub>36760102</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
</tr>
<!-- END RPN TABLE -->
</tbody></table>

**Notes:**

- Inference time only includes RPN proposal generation.
- "prop. AR" is proposal average recall at 1000 proposals per image.
- Proposal download links ("props"): "1" is `coco_2014_train`; "2" is `coco_2014_valminusminival`; and "3" is `coco_2014_minival`.

### Fast & Mask R-CNN Baselines Using Precomputed RPN Proposals

<table><tbody>
<!-- START 2-STAGE TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop.<br/>AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.0</sub></sup></td>
<td align="right"><sup><sub>0.456</sub></sup></td>
<td align="right"><sup><sub>22.8</sub></sup></td>
<td align="right"><sup><sub>0.241&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>34.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36224013</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36224013/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml.08_22_00.vHd5BeBP/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224013/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml.08_22_00.vHd5BeBP/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.0</sub></sup></td>
<td align="right"><sup><sub>0.453</sub></sup></td>
<td align="right"><sup><sub>45.3</sub></sup></td>
<td align="right"><sup><sub>0.241&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>35.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36224046</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36224046/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml.08_22_57.XFxNqEnL/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224046/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml.08_22_57.XFxNqEnL/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.0</sub></sup></td>
<td align="right"><sup><sub>0.285</sub></sup></td>
<td align="right"><sup><sub>7.1</sub></sup></td>
<td align="right"><sup><sub>0.076&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>36.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36225147</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.0</sub></sup></td>
<td align="right"><sup><sub>0.287</sub></sup></td>
<td align="right"><sup><sub>14.4</sub></sup></td>
<td align="right"><sup><sub>0.077&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>36.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36225249</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36225249/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml.08_40_18.zoChak1f/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225249/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml.08_40_18.zoChak1f/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.448</sub></sup></td>
<td align="right"><sup><sub>11.2</sub></sup></td>
<td align="right"><sup><sub>0.102&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>38.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36228880</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36228880/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml.09_25_03.tZuHkSpl/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36228880/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml.09_25_03.tZuHkSpl/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.449</sub></sup></td>
<td align="right"><sup><sub>22.5</sub></sup></td>
<td align="right"><sup><sub>0.103&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>39.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36228933</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36228933/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml.09_26_27.jkOUTrrk/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36228933/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml.09_26_27.jkOUTrrk/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.3</sub></sup></td>
<td align="right"><sup><sub>0.994</sub></sup></td>
<td align="right"><sup><sub>49.7</sub></sup></td>
<td align="right"><sup><sub>0.292&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>40.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36226250</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36226250/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml.08_54_22.u0LaxQsC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36226250/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml.08_54_22.u0LaxQsC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.3</sub></sup></td>
<td align="right"><sup><sub>0.980</sub></sup></td>
<td align="right"><sup><sub>98.0</sub></sup></td>
<td align="right"><sup><sub>0.291&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>39.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36226326</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36226326/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml.08_55_54.2F7MP1CD/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36226326/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml.08_55_54.2F7MP1CD/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.721</sub></sup></td>
<td align="right"><sup><sub>36.1</sub></sup></td>
<td align="right"><sup><sub>0.217&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>40.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37119777</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37119777/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml.06_38_03.d5N36egm/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37119777/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml.06_38_03.d5N36egm/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Fast</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.720</sub></sup></td>
<td align="right"><sup><sub>72.0</sub></sup></td>
<td align="right"><sup><sub>0.217&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>39.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37121469</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37121469/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml.07_03_53.EPrHk63L/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37121469/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml.07_03_53.EPrHk63L/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.466</sub></sup></td>
<td align="right"><sup><sub>23.3</sub></sup></td>
<td align="right"><sup><sub>0.252&nbsp;+&nbsp;0.020</sub></sup></td>
<td align="right"><sup><sub>35.5</sub></sup></td>
<td align="right"><sup><sub>31.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36224121</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.464</sub></sup></td>
<td align="right"><sup><sub>46.4</sub></sup></td>
<td align="right"><sup><sub>0.253&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>36.9</sub></sup></td>
<td align="right"><sup><sub>32.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36224151</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.9</sub></sup></td>
<td align="right"><sup><sub>0.377</sub></sup></td>
<td align="right"><sup><sub>9.4</sub></sup></td>
<td align="right"><sup><sub>0.082&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>37.3</sub></sup></td>
<td align="right"><sup><sub>33.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36225401</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.9</sub></sup></td>
<td align="right"><sup><sub>0.377</sub></sup></td>
<td align="right"><sup><sub>18.9</sub></sup></td>
<td align="right"><sup><sub>0.083&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>37.7</sub></sup></td>
<td align="right"><sup><sub>34.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36225732</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.6</sub></sup></td>
<td align="right"><sup><sub>0.539</sub></sup></td>
<td align="right"><sup><sub>13.5</sub></sup></td>
<td align="right"><sup><sub>0.111&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>39.4</sub></sup></td>
<td align="right"><sup><sub>35.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36229407</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.6</sub></sup></td>
<td align="right"><sup><sub>0.537</sub></sup></td>
<td align="right"><sup><sub>26.9</sub></sup></td>
<td align="right"><sup><sub>0.109&nbsp;+&nbsp;0.016</sub></sup></td>
<td align="right"><sup><sub>40.0</sub></sup></td>
<td align="right"><sup><sub>35.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36229740</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.3</sub></sup></td>
<td align="right"><sup><sub>1.036</sub></sup></td>
<td align="right"><sup><sub>51.8</sub></sup></td>
<td align="right"><sup><sub>0.292&nbsp;+&nbsp;0.016</sub></sup></td>
<td align="right"><sup><sub>41.3</sub></sup></td>
<td align="right"><sup><sub>37.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36226382</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.3</sub></sup></td>
<td align="right"><sup><sub>1.035</sub></sup></td>
<td align="right"><sup><sub>103.5</sub></sup></td>
<td align="right"><sup><sub>0.292&nbsp;+&nbsp;0.014</sub></sup></td>
<td align="right"><sup><sub>41.1</sub></sup></td>
<td align="right"><sup><sub>36.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36672114</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.4</sub></sup></td>
<td align="right"><sup><sub>0.766</sub></sup></td>
<td align="right"><sup><sub>38.3</sub></sup></td>
<td align="right"><sup><sub>0.223&nbsp;+&nbsp;0.017</sub></sup></td>
<td align="right"><sup><sub>41.3</sub></sup></td>
<td align="right"><sup><sub>37.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37121516</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.4</sub></sup></td>
<td align="right"><sup><sub>0.765</sub></sup></td>
<td align="right"><sup><sub>76.5</sub></sup></td>
<td align="right"><sup><sub>0.222&nbsp;+&nbsp;0.014</sub></sup></td>
<td align="right"><sup><sub>40.7</sub></sup></td>
<td align="right"><sup><sub>36.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37121596</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<!-- END 2-STAGE TABLE -->
</tbody></table>

**Notes:**

- Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone.
- Inference time *excludes* proposal generation.

### End-to-End Faster & Mask R-CNN Baselines

<table><tbody>
<!-- START E2E FASTER AND MASK TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop.<br/>AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.3</sub></sup></td>
<td align="right"><sup><sub>0.566</sub></sup></td>
<td align="right"><sup><sub>28.3</sub></sup></td>
<td align="right"><sup><sub>0.167&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>34.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857197</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.3</sub></sup></td>
<td align="right"><sup><sub>0.569</sub></sup></td>
<td align="right"><sup><sub>56.9</sub></sup></td>
<td align="right"><sup><sub>0.174&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>36.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857281</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857281/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml.01_34_56.ScPH0Z4r/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857281/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml.01_34_56.ScPH0Z4r/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.2</sub></sup></td>
<td align="right"><sup><sub>0.544</sub></sup></td>
<td align="right"><sup><sub>13.6</sub></sup></td>
<td align="right"><sup><sub>0.093&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>36.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857345</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.2</sub></sup></td>
<td align="right"><sup><sub>0.546</sub></sup></td>
<td align="right"><sup><sub>27.3</sub></sup></td>
<td align="right"><sup><sub>0.092&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>37.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857389</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857389/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml.01_37_22.KSeq0b5q/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857389/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml.01_37_22.KSeq0b5q/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.9</sub></sup></td>
<td align="right"><sup><sub>0.647</sub></sup></td>
<td align="right"><sup><sub>16.2</sub></sup></td>
<td align="right"><sup><sub>0.120&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>39.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857890</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.9</sub></sup></td>
<td align="right"><sup><sub>0.647</sub></sup></td>
<td align="right"><sup><sub>32.4</sub></sup></td>
<td align="right"><sup><sub>0.119&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>39.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35857952</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35857952/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml.01_39_49.JPwJDh92/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35857952/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml.01_39_49.JPwJDh92/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.9</sub></sup></td>
<td align="right"><sup><sub>1.057</sub></sup></td>
<td align="right"><sup><sub>52.9</sub></sup></td>
<td align="right"><sup><sub>0.305&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>41.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35858015</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35858015/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml.01_40_54.1xc565DE/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858015/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml.01_40_54.1xc565DE/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.9</sub></sup></td>
<td align="right"><sup><sub>1.055</sub></sup></td>
<td align="right"><sup><sub>105.5</sub></sup></td>
<td align="right"><sup><sub>0.304&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>40.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35858198</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35858198/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml.01_41_46.CX2InaoG/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858198/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml.01_41_46.CX2InaoG/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.0</sub></sup></td>
<td align="right"><sup><sub>0.799</sub></sup></td>
<td align="right"><sup><sub>40.0</sub></sup></td>
<td align="right"><sup><sub>0.233&nbsp;+&nbsp;0.004</sub></sup></td>
<td align="right"><sup><sub>41.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36761737</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Faster</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.0</sub></sup></td>
<td align="right"><sup><sub>0.800</sub></sup></td>
<td align="right"><sup><sub>80.0</sub></sup></td>
<td align="right"><sup><sub>0.233&nbsp;+&nbsp;0.003</sub></sup></td>
<td align="right"><sup><sub>40.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36761786</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36761786/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml.06_33_22.VqFNuxk6/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36761786/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml.06_33_22.VqFNuxk6/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.6</sub></sup></td>
<td align="right"><sup><sub>0.620</sub></sup></td>
<td align="right"><sup><sub>31.0</sub></sup></td>
<td align="right"><sup><sub>0.181&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>35.8</sub></sup></td>
<td align="right"><sup><sub>31.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35858791</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-C4</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>6.6</sub></sup></td>
<td align="right"><sup><sub>0.620</sub></sup></td>
<td align="right"><sup><sub>62.0</sub></sup></td>
<td align="right"><sup><sub>0.182&nbsp;+&nbsp;0.017</sub></sup></td>
<td align="right"><sup><sub>37.8</sub></sup></td>
<td align="right"><sup><sub>32.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35858828</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.6</sub></sup></td>
<td align="right"><sup><sub>0.889</sub></sup></td>
<td align="right"><sup><sub>22.2</sub></sup></td>
<td align="right"><sup><sub>0.099&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>37.7</sub></sup></td>
<td align="right"><sup><sub>33.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35858933</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.6</sub></sup></td>
<td align="right"><sup><sub>0.897</sub></sup></td>
<td align="right"><sup><sub>44.9</sub></sup></td>
<td align="right"><sup><sub>0.099&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>38.6</sub></sup></td>
<td align="right"><sup><sub>34.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35859007</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.2</sub></sup></td>
<td align="right"><sup><sub>1.008</sub></sup></td>
<td align="right"><sup><sub>25.2</sub></sup></td>
<td align="right"><sup><sub>0.126&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>40.0</sub></sup></td>
<td align="right"><sup><sub>35.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35861795</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.2</sub></sup></td>
<td align="right"><sup><sub>0.993</sub></sup></td>
<td align="right"><sup><sub>49.7</sub></sup></td>
<td align="right"><sup><sub>0.126&nbsp;+&nbsp;0.017</sub></sup></td>
<td align="right"><sup><sub>40.9</sub></sup></td>
<td align="right"><sup><sub>36.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35861858</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.6</sub></sup></td>
<td align="right"><sup><sub>1.217</sub></sup></td>
<td align="right"><sup><sub>60.9</sub></sup></td>
<td align="right"><sup><sub>0.309&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>42.4</sub></sup></td>
<td align="right"><sup><sub>37.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36494496</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.6</sub></sup></td>
<td align="right"><sup><sub>1.210</sub></sup></td>
<td align="right"><sup><sub>121.0</sub></sup></td>
<td align="right"><sup><sub>0.309&nbsp;+&nbsp;0.015</sub></sup></td>
<td align="right"><sup><sub>42.2</sub></sup></td>
<td align="right"><sup><sub>37.2</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>35859745</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.961</sub></sup></td>
<td align="right"><sup><sub>48.1</sub></sup></td>
<td align="right"><sup><sub>0.239&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>42.1</sub></sup></td>
<td align="right"><sup><sub>37.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36761843</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.975</sub></sup></td>
<td align="right"><sup><sub>97.5</sub></sup></td>
<td align="right"><sup><sub>0.240&nbsp;+&nbsp;0.016</sub></sup></td>
<td align="right"><sup><sub>41.7</sub></sup></td>
<td align="right"><sup><sub>36.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36762092</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<!-- END E2E FASTER AND MASK TABLE -->
</tbody></table>

**Notes:**

- For these models, RPN and the detector are trained jointly and end-to-end.
- Inference time is fully image-to-detections, *including* proposal generation.


### RetinaNet Baselines

<table><tbody>
<!-- START RETINANET TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop.<br/>AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.8</sub></sup></td>
<td align="right"><sup><sub>0.483</sub></sup></td>
<td align="right"><sup><sub>12.1</sub></sup></td>
<td align="right"><sup><sub>0.125</sub></sup></td>
<td align="right"><sup><sub>35.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768636</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768636/12_2017_baselines/retinanet_R-50-FPN_1x.yaml.08_29_48.t4zc9clc/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768636/12_2017_baselines/retinanet_R-50-FPN_1x.yaml.08_29_48.t4zc9clc/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.8</sub></sup></td>
<td align="right"><sup><sub>0.482</sub></sup></td>
<td align="right"><sup><sub>24.1</sub></sup></td>
<td align="right"><sup><sub>0.127</sub></sup></td>
<td align="right"><sup><sub>35.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768677</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768677/12_2017_baselines/retinanet_R-50-FPN_2x.yaml.08_30_38.sgZIQZQ5/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768677/12_2017_baselines/retinanet_R-50-FPN_2x.yaml.08_30_38.sgZIQZQ5/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.7</sub></sup></td>
<td align="right"><sup><sub>0.666</sub></sup></td>
<td align="right"><sup><sub>16.7</sub></sup></td>
<td align="right"><sup><sub>0.156</sub></sup></td>
<td align="right"><sup><sub>37.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768744</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768744/12_2017_baselines/retinanet_R-101-FPN_1x.yaml.08_31_38.5poQe1ZB/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768744/12_2017_baselines/retinanet_R-101-FPN_1x.yaml.08_31_38.5poQe1ZB/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.7</sub></sup></td>
<td align="right"><sup><sub>0.666</sub></sup></td>
<td align="right"><sup><sub>33.3</sub></sup></td>
<td align="right"><sup><sub>0.154</sub></sup></td>
<td align="right"><sup><sub>37.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768840</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768840/12_2017_baselines/retinanet_R-101-FPN_2x.yaml.08_33_29.grtM0RTf/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768840/12_2017_baselines/retinanet_R-101-FPN_2x.yaml.08_33_29.grtM0RTf/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.6</sub></sup></td>
<td align="right"><sup><sub>1.613</sub></sup></td>
<td align="right"><sup><sub>40.3</sub></sup></td>
<td align="right"><sup><sub>0.341</sub></sup></td>
<td align="right"><sup><sub>39.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768875</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768875/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml.08_34_37.FSXgMpzP/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768875/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml.08_34_37.FSXgMpzP/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.6</sub></sup></td>
<td align="right"><sup><sub>1.625</sub></sup></td>
<td align="right"><sup><sub>81.3</sub></sup></td>
<td align="right"><sup><sub>0.339</sub></sup></td>
<td align="right"><sup><sub>39.2</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36768907</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36768907/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml.08_35_40.pF3nzPpu/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36768907/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml.08_35_40.pF3nzPpu/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.7</sub></sup></td>
<td align="right"><sup><sub>1.343</sub></sup></td>
<td align="right"><sup><sub>33.6</sub></sup></td>
<td align="right"><sup><sub>0.277</sub></sup></td>
<td align="right"><sup><sub>39.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36769563</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36769563/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml.08_42_05.06JTK6vJ/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36769563/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml.08_42_05.06JTK6vJ/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>RetinaNet</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.7</sub></sup></td>
<td align="right"><sup><sub>1.340</sub></sup></td>
<td align="right"><sup><sub>67.0</sub></sup></td>
<td align="right"><sup><sub>0.276</sub></sup></td>
<td align="right"><sup><sub>38.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>36769641</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36769641/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml.08_42_55.sUPnwXI5/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36769641/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml.08_42_55.sUPnwXI5/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json">boxes</a></sub></sup></td>
</tr>
<!-- END RETINANET TABLE -->
</tbody></table>

**Notes:** none

### Mask R-CNN with Bells & Whistles

<table><tbody>
<!-- START BELLS TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop.<br/>AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>X-152-32x8d-FPN-IN5k</sub></sup></td>
<td align="left"><sup><sub>Mask</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>1</sub></sup></td>
<td align="right"><sup><sub>9.6</sub></sup></td>
<td align="right"><sup><sub>1.188</sub></sup></td>
<td align="right"><sup><sub>85.8</sub></sup></td>
<td align="right"><sup><sub>12.100&nbsp;+&nbsp;0.046</sub></sup></td>
<td align="right"><sup><sub>48.1</sub></sup></td>
<td align="right"><sup><sub>41.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37129812</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
<tr>
<td align="left"><sup><sub>[above without test-time aug.]</sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub>0.325&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>45.2</sub></sup></td>
<td align="right"><sup><sub>39.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
<td align="right"><sup><sub></sub></sup></td>
</tr>
<!-- END BELLS TABLE -->
</tbody></table>

**Notes:**

- A deeper backbone architecture is used: ResNeXt-**152**-32x8d-FPN
- The backbone ResNeXt-152-32x8d model was trained on ImageNet-**5k** (not the usual ImageNet-1k)
- Training uses multi-scale jitter over scales {640, 672, 704, 736, 768, 800}
- Row 1: test-time augmentations are multi-scale testing over {400, 500, 600, 700, 900, 1000, 1100, 1200} and horizontal flipping (on each scale)
- Row 2: same model as row 1, but without any test-time augmentation (i.e., same as the common baseline configuration)
- Like the other results, this is a single model result (it is not an ensemble of models)

## Keypoint Detection Baselines

#### Common Settings for Keypoint Detection Baselines (That Differ from Boxes and Masks)

Our keypoint detection baselines differ from our box and mask baselines in a couple of details:

- Due to less training data for the keypoint detection task compared with boxes and masks, we enable multi-scale jitter during training for all keypoint detection models. (Testing is still without any test-time augmentations by default.)
- Models are trained only on images from `coco_2014_train` union `coco_2014_valminusminival` that contain at least one person with keypoint annotations (all other images are discarded from the training set).
- Metrics are reported for the person class only (still run on the entire `coco_2014_minival` dataset).

### Person-Specific RPN Baselines

<table><tbody>
<!-- START PERSON-ONLY RPN TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop. AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>6.4</sub></sup></td>
<td align="right"><sup><sub>0.391</sub></sup></td>
<td align="right"><sup><sub>9.8</sub></sup></td>
<td align="right"><sup><sub>0.082</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>64.0</sub></sup></td>
<td align="right"><sup><sub>35998996</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.1</sub></sup></td>
<td align="right"><sup><sub>0.504</sub></sup></td>
<td align="right"><sup><sub>12.6</sub></sup></td>
<td align="right"><sup><sub>0.109</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.2</sub></sup></td>
<td align="right"><sup><sub>35999521</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>11.5</sub></sup></td>
<td align="right"><sup><sub>1.394</sub></sup></td>
<td align="right"><sup><sub>34.9</sub></sup></td>
<td align="right"><sup><sub>0.289</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.9</sub></sup></td>
<td align="right"><sup><sub>35999553</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>RPN</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>11.6</sub></sup></td>
<td align="right"><sup><sub>1.104</sub></sup></td>
<td align="right"><sup><sub>27.6</sub></sup></td>
<td align="right"><sup><sub>0.224</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.2</sub></sup></td>
<td align="right"><sup><sub>36760438</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;props:&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl">1</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl">2</a>,&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl">3</a></sub></sup></td>
</tr>
<!-- END PERSON-ONLY RPN TABLE -->
</tbody></table>

**Notes:**

- *Metrics are for the person category only.*
- Inference time only includes RPN proposal generation.
- "prop. AR" is proposal average recall at 1000 proposals per image.
- Proposal download links ("props"): "1" is `coco_2014_train`; "2" is `coco_2014_valminusminival`; and "3" is `coco_2014_minival`. These include all images, not just the ones with valid keypoint annotations.

### Keypoint-Only Mask R-CNN Baselines Using Precomputed RPN Proposals

<table><tbody>
<!-- START 2-STAGE KEYPOINTS TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop. AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.533</sub></sup></td>
<td align="right"><sup><sub>13.3</sub></sup></td>
<td align="right"><sup><sub>0.081&nbsp;+&nbsp;0.087</sub></sup></td>
<td align="right"><sup><sub>52.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>64.1</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37651787</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gene
ralized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/test/keypoints_coco_2014_minival/generalized_rcnn
/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/test/keypoints_coco_2014_miniva
l/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>7.7</sub></sup></td>
<td align="right"><sup><sub>0.533</sub></sup></td>
<td align="right"><sup><sub>19.2</sub></sup></td>
<td align="right"><sup><sub>0.080&nbsp;+&nbsp;0.085</sub></sup></td>
<td align="right"><sup><sub>53.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37651887</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gen
eralized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/test/keypoints_coco_2014_minival/generalized_rc
nn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/test/keypoints_coco_2014_min
ival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.4</sub></sup></td>
<td align="right"><sup><sub>0.668</sub></sup></td>
<td align="right"><sup><sub>16.7</sub></sup></td>
<td align="right"><sup><sub>0.109&nbsp;+&nbsp;0.080</sub></sup></td>
<td align="right"><sup><sub>53.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37651996</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gen
eralized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/test/keypoints_coco_2014_minival/generalized_rc
nn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/test/keypoints_coco_2014_min
ival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.4</sub></sup></td>
<td align="right"><sup><sub>0.668</sub></sup></td>
<td align="right"><sup><sub>24.1</sub></sup></td>
<td align="right"><sup><sub>0.108&nbsp;+&nbsp;0.076</sub></sup></td>
<td align="right"><sup><sub>54.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37652016</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/ge
neralized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/test/keypoints_coco_2014_minival/generalized_
rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/test/keypoints_coco_2014_
minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.8</sub></sup></td>
<td align="right"><sup><sub>1.477</sub></sup></td>
<td align="right"><sup><sub>36.9</sub></sup></td>
<td align="right"><sup><sub>0.288&nbsp;+&nbsp;0.077</sub></sup></td>
<td align="right"><sup><sub>55.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.7</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37731079</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminiv
al/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/test/keypoints_coco_2014_minival/ge
neralized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/test/keypo
ints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.9</sub></sup></td>
<td align="right"><sup><sub>1.478</sub></sup></td>
<td align="right"><sup><sub>53.4</sub></sup></td>
<td align="right"><sup><sub>0.286&nbsp;+&nbsp;0.075</sub></sup></td>
<td align="right"><sup><sub>56.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>67.1</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37731142</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusmini
val/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/test/keypoints_coco_2014_minival/
generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/test/ke
ypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.9</sub></sup></td>
<td align="right"><sup><sub>1.215</sub></sup></td>
<td align="right"><sup><sub>30.4</sub></sup></td>
<td align="right"><sup><sub>0.219&nbsp;+&nbsp;0.084</sub></sup></td>
<td align="right"><sup><sub>55.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.2</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37730253</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminiv
al/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/test/keypoints_coco_2014_minival/ge
neralized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/test/keypo
ints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.9</sub></sup></td>
<td align="right"><sup><sub>1.214</sub></sup></td>
<td align="right"><sup><sub>43.8</sub></sup></td>
<td align="right"><sup><sub>0.218&nbsp;+&nbsp;0.071</sub></sup></td>
<td align="right"><sup><sub>55.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>67.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37731010</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusmini
val/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/test/keypoints_coco_2014_minival/
generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/test/ke
ypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<!-- END 2-STAGE KEYPOINTS TABLE -->
</tbody></table>

**Notes:**

- *Metrics are for the person category only.*
- Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone.
- Inference time *excludes* proposal generation.


### End-to-End Keypoint-Only Mask R-CNN Baselines

<table><tbody>
<!-- START END-TO-END KEYPOINTS TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask AP</sub></sup></th>
<th valign="bottom"><sup><sub>kp AP</sub></sup></th>
<th valign="bottom"><sup><sub>prop. AR</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.0</sub></sup></td>
<td align="right"><sup><sub>0.832</sub></sup></td>
<td align="right"><sup><sub>20.8</sub></sup></td>
<td align="right"><sup><sub>0.097&nbsp;+&nbsp;0.092</sub></sup></td>
<td align="right"><sup><sub>53.6</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>64.2</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37697547</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-50-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>9.0</sub></sup></td>
<td align="right"><sup><sub>0.828</sub></sup></td>
<td align="right"><sup><sub>29.9</sub></sup></td>
<td align="right"><sup><sub>0.096&nbsp;+&nbsp;0.089</sub></sup></td>
<td align="right"><sup><sub>54.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.4</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37697714</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.6</sub></sup></td>
<td align="right"><sup><sub>0.923</sub></sup></td>
<td align="right"><sup><sub>23.1</sub></sup></td>
<td align="right"><sup><sub>0.124&nbsp;+&nbsp;0.084</sub></sup></td>
<td align="right"><sup><sub>54.5</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>64.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37697946</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.6</sub></sup></td>
<td align="right"><sup><sub>0.921</sub></sup></td>
<td align="right"><sup><sub>33.3</sub></sup></td>
<td align="right"><sup><sub>0.123&nbsp;+&nbsp;0.083</sub></sup></td>
<td align="right"><sup><sub>55.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>65.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37698009</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>14.1</sub></sup></td>
<td align="right"><sup><sub>1.655</sub></sup></td>
<td align="right"><sup><sub>41.4</sub></sup></td>
<td align="right"><sup><sub>0.302&nbsp;+&nbsp;0.079</sub></sup></td>
<td align="right"><sup><sub>56.3</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37732355</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-64x4d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>14.1</sub></sup></td>
<td align="right"><sup><sub>1.731</sub></sup></td>
<td align="right"><sup><sub>62.5</sub></sup></td>
<td align="right"><sup><sub>0.322&nbsp;+&nbsp;0.074</sub></sup></td>
<td align="right"><sup><sub>56.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.8</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37732415</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>14.2</sub></sup></td>
<td align="right"><sup><sub>1.410</sub></sup></td>
<td align="right"><sup><sub>35.3</sub></sup></td>
<td align="right"><sup><sub>0.235&nbsp;+&nbsp;0.080</sub></sup></td>
<td align="right"><sup><sub>56.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>66.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37792158</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>X-101-32x8d-FPN</sub></sup></td>
<td align="left"><sup><sub>Kps</sub></sup></td>
<td align="left"><sup><sub>s1x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>14.2</sub></sup></td>
<td align="right"><sup><sub>1.408</sub></sup></td>
<td align="right"><sup><sub>50.8</sub></sup></td>
<td align="right"><sup><sub>0.236&nbsp;+&nbsp;0.075</sub></sup></td>
<td align="right"><sup><sub>56.9</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>67.0</sub></sup></td>
<td align="right"><sup><sub>-</sub></sup></td>
<td align="right"><sup><sub>37732318</sub></sup></td>
<td align="left"><sup><sub><a href="https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json">boxes</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json">kps</a></sub></sup></td>
</tr>
<!-- END END-TO-END KEYPOINTS TABLE -->
</tbody></table>

**Notes:**

- *Metrics are for the person category only.*
- For these models, RPN and the detector are trained jointly and end-to-end.
- Inference time is fully image-to-detections, *including* proposal generation.


================================================
FILE: Makefile
================================================
# Don't use the --user flag for setup.py develop mode with virtualenv.
DEV_USER_FLAG=$(shell python -c "import sys; print('' if hasattr(sys, 'real_prefix') else '--user')")

.PHONY: default
default: dev

.PHONY: install
install:
	python setup.py install

.PHONY: ops
ops:
	mkdir -p build && cd build && cmake .. && make -j$(shell nproc)

.PHONY: dev
dev:
	python setup.py develop $(DEV_USER_FLAG)

.PHONY: clean
clean:
	python setup.py develop --uninstall $(DEV_USER_FLAG)
	rm -rf build


================================================
FILE: NOTICE
================================================
Portions of this software are derived from py-faster-rcnn.

==============================================================================
py-faster-rcnn licence
==============================================================================

Faster R-CNN

The MIT License (MIT)

Copyright (c) 2015 Microsoft Corporation

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: README.md
================================================
**Detectron is deprecated. Please see [detectron2](https://github.com/facebookresearch/detectron2), a ground-up rewrite of Detectron in PyTorch.**

# Detectron

Detectron is Facebook AI Research's software system that implements state-of-the-art object detection algorithms, including [Mask R-CNN](https://arxiv.org/abs/1703.06870). It is written in Python and powered by the [Caffe2](https://github.com/caffe2/caffe2) deep learning framework.

At FAIR, Detectron has enabled numerous research projects, including: [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144), [Mask R-CNN](https://arxiv.org/abs/1703.06870), [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333), [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002), [Non-local Neural Networks](https://arxiv.org/abs/1711.07971), [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370), [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440), [DensePose: Dense Human Pose Estimation In The Wild](https://arxiv.org/abs/1802.00434), and [Group Normalization](https://arxiv.org/abs/1803.08494).

<div align="center">
  <img src="demo/output/33823288584_1d21cf0a26_k_example_output.jpg" width="700px" />
  <p>Example Mask R-CNN output.</p>
</div>

## Introduction

The goal of Detectron is to provide a high-quality, high-performance
codebase for object detection *research*. It is designed to be flexible in order
to support rapid implementation and evaluation of novel research. Detectron
includes implementations of the following object detection algorithms:

- [Mask R-CNN](https://arxiv.org/abs/1703.06870) -- *Marr Prize at ICCV 2017*
- [RetinaNet](https://arxiv.org/abs/1708.02002) -- *Best Student Paper Award at ICCV 2017*
- [Faster R-CNN](https://arxiv.org/abs/1506.01497)
- [RPN](https://arxiv.org/abs/1506.01497)
- [Fast R-CNN](https://arxiv.org/abs/1504.08083)
- [R-FCN](https://arxiv.org/abs/1605.06409)

using the following backbone network architectures:

- [ResNeXt{50,101,152}](https://arxiv.org/abs/1611.05431)
- [ResNet{50,101,152}](https://arxiv.org/abs/1512.03385)
- [Feature Pyramid Networks](https://arxiv.org/abs/1612.03144) (with ResNet/ResNeXt)
- [VGG16](https://arxiv.org/abs/1409.1556)

Additional backbone architectures may be easily implemented. For more details about these models, please see [References](#references) below.

## Update

- 4/2018: Support Group Normalization - see [`GN/README.md`](./projects/GN/README.md)

## License

Detectron is released under the [Apache 2.0 license](https://github.com/facebookresearch/detectron/blob/master/LICENSE). See the [NOTICE](https://github.com/facebookresearch/detectron/blob/master/NOTICE) file for additional details.

## Citing Detectron

If you use Detectron in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.

```
@misc{Detectron2018,
  author =       {Ross Girshick and Ilija Radosavovic and Georgia Gkioxari and
                  Piotr Doll\'{a}r and Kaiming He},
  title =        {Detectron},
  howpublished = {\url{https://github.com/facebookresearch/detectron}},
  year =         {2018}
}
```

## Model Zoo and Baselines

We provide a large set of baseline results and trained models available for download in the [Detectron Model Zoo](MODEL_ZOO.md).

## Installation

Please find installation instructions for Caffe2 and Detectron in [`INSTALL.md`](INSTALL.md).

## Quick Start: Using Detectron

After installation, please see [`GETTING_STARTED.md`](GETTING_STARTED.md) for brief tutorials covering inference and training with Detectron.

## Getting Help

To start, please check the [troubleshooting](INSTALL.md#troubleshooting) section of our installation instructions as well as our [FAQ](FAQ.md). If you couldn't find help there, try searching our GitHub issues. We intend the issues page to be a forum in which the community collectively troubleshoots problems.

If bugs are found, **we appreciate pull requests** (including adding Q&A's to `FAQ.md` and improving our installation instructions and troubleshooting documents). Please see [CONTRIBUTING.md](CONTRIBUTING.md) for more information about contributing to Detectron.

## References

- [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440).
  Ilija Radosavovic, Piotr Dollár, Ross Girshick, Georgia Gkioxari, and Kaiming He.
  Tech report, arXiv, Dec. 2017.
- [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370).
  Ronghang Hu, Piotr Dollár, Kaiming He, Trevor Darrell, and Ross Girshick.
  Tech report, arXiv, Nov. 2017.
- [Non-Local Neural Networks](https://arxiv.org/abs/1711.07971).
  Xiaolong Wang, Ross Girshick, Abhinav Gupta, and Kaiming He.
  Tech report, arXiv, Nov. 2017.
- [Mask R-CNN](https://arxiv.org/abs/1703.06870).
  Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross Girshick.
  IEEE International Conference on Computer Vision (ICCV), 2017.
- [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002).
  Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, and Piotr Dollár.
  IEEE International Conference on Computer Vision (ICCV), 2017.
- [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).
  Priya Goyal, Piotr Dollár, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He.
  Tech report, arXiv, June 2017.
- [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333).
  Georgia Gkioxari, Ross Girshick, Piotr Dollár, and Kaiming He.
  Tech report, arXiv, Apr. 2017.
- [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144).
  Tsung-Yi Lin, Piotr Dollár, Ross Girshick, Kaiming He, Bharath Hariharan, and Serge Belongie.
  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017.
- [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431).
  Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, and Kaiming He.
  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017.
- [R-FCN: Object Detection via Region-based Fully Convolutional Networks](http://arxiv.org/abs/1605.06409).
  Jifeng Dai, Yi Li, Kaiming He, and Jian Sun.
  Conference on Neural Information Processing Systems (NIPS), 2016.
- [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385).
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun.
  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016.
- [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](http://arxiv.org/abs/1506.01497)
  Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
  Conference on Neural Information Processing Systems (NIPS), 2015.
- [Fast R-CNN](http://arxiv.org/abs/1504.08083).
  Ross Girshick.
  IEEE International Conference on Computer Vision (ICCV), 2015.


================================================
FILE: cmake/Summary.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake

# Prints configuration summary.
function (detectron_print_config_summary)
  message(STATUS "Summary:")
  message(STATUS "  CMake version        : ${CMAKE_VERSION}")
  message(STATUS "  CMake command        : ${CMAKE_COMMAND}")
  message(STATUS "  System name          : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "  C++ compiler         : ${CMAKE_CXX_COMPILER}")
  message(STATUS "  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
  message(STATUS "  CXX flags            : ${CMAKE_CXX_FLAGS}")
  message(STATUS "  Caffe2 version       : ${CAFFE2_VERSION}")
  message(STATUS "  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}")
  if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)
    message(STATUS "  Caffe2 found CUDA    : True")
    message(STATUS "    CUDA version       : ${CUDA_VERSION}")
    message(STATUS "    CuDNN version      : ${CUDNN_VERSION}")
  else()
    message(STATUS "  Caffe2 found CUDA    : False")
  endif()
endfunction()


================================================
FILE: cmake/legacy/Cuda.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Cuda.cmake

# Caffe2 cmake utility to prepare for cuda build.
# This cmake file is called from Dependencies.cmake. You do not need to
# manually invoke it.

# Known NVIDIA GPU achitectures Caffe2 can be compiled for.
# Default is set to cuda 9. If we detect the cuda architectores to be less than
# 9, we will lower it to the corresponding known archs.
set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x
set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61") # for CUDA 8.x
set(Caffe2_known_gpu_archs7 "20 21(20) 30 35 50 52") # for CUDA 7.x


################################################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   caffe_select_nvcc_arch_flags(out_variable)
function(caffe2_select_nvcc_arch_flags out_variable)
  # List of arch names
  set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual")
  set(__archs_name_default "All")

  # Set CUDA_ARCH_NAME strings (so it will be seen as dropbox in the CMake GUI)
  set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU architecture")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names})
  mark_as_advanced(CUDA_ARCH_NAME)

  # Verify CUDA_ARCH_NAME value
  if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " __archs_names "${__archs_names}")
    message(FATAL_ERROR "Invalid CUDA_ARCH_NAME, supported values: ${__archs_names}. Got ${CUDA_ARCH_NAME}")
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
    set(CUDA_ARCH_BIN "" CACHE STRING
      "Specify GPU architectures to build binaries for (BIN(PTX) format is supported)")
    set(CUDA_ARCH_PTX "" CACHE STRING
      "Specify GPU architectures to build PTX intermediate code for")
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
    set(__cuda_arch_bin "30 35")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
    set(__cuda_arch_bin "50")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
    set(__cuda_arch_bin "60 61")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
    set(__cuda_arch_bin "70")
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(__cuda_arch_bin ${Caffe2_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Manual")
    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
    set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
  else()
    message(FATAL_ERROR "Invalid CUDA_ARCH_NAME")
  endif()

  # Remove dots and convert to lists
  string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
  string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}")
  string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
  string(REGEX MATCHALL "[0-9]+"   __cuda_arch_ptx "${__cuda_arch_ptx}")
  list(REMOVE_DUPLICATES __cuda_arch_bin)
  list(REMOVE_DUPLICATES __cuda_arch_ptx)

  set(__nvcc_flags "")
  set(__nvcc_archs_readable "")

  # Tell NVCC to add binaries for the specified GPUs
  foreach(__arch ${__cuda_arch_bin})
    if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
      list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
      list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
      list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
      list(APPEND __nvcc_archs_readable sm_${__arch})
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(__arch ${__cuda_arch_ptx})
    list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
    list(APPEND __nvcc_archs_readable compute_${__arch})
  endforeach()

  string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
  set(${out_variable}          ${__nvcc_flags}          PARENT_SCOPE)
  set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
endfunction()


################################################################################################
# Short command for cuda compilation
# Usage:
#   caffe_cuda_compile(<objlist_variable> <cuda_files>)
macro(caffe2_cuda_compile objlist_variable)
  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
    set(${var}_backup_in_cuda_compile_ "${${var}}")

    # we remove /EHa as it generates warnings under windows
    string(REPLACE "/EHa" "" ${var} "${${var}}")

  endforeach()

  if(APPLE)
    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
  endif()

  cuda_compile(cuda_objcs ${ARGN})

  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
    set(${var} "${${var}_backup_in_cuda_compile_}")
    unset(${var}_backup_in_cuda_compile_)
  endforeach()

  set(${objlist_variable} ${cuda_objcs})
endmacro()

################################################################################################
###  Non macro section
################################################################################################

# Special care for windows platform: we know that 32-bit windows does not support cuda.
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
  if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8))
    message(FATAL_ERROR
            "CUDA support not available with 32-bit windows. Did you "
            "forget to set Win64 in the generator target?")
    return()
  endif()
endif()

find_package(CUDA 7.0 QUIET)
find_cuda_helper_libs(curand)  # cmake 2.8.7 compartibility which doesn't search for curand

if(NOT CUDA_FOUND)
  set(HAVE_CUDA FALSE)
  return()
endif()

set(HAVE_CUDA TRUE)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
if (${CUDA_VERSION} LESS 7.0)
  message(FATAL_ERROR "Caffe2 requires CUDA 7.0 or later version")
elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x
  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs7})
  list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
  list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs8})
  list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
  list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
  # CUDA 8 may complain that sm_20 is no longer supported. Suppress the
  # warning for now.
  list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets")
endif()

caffe2_include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY}
                              ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})

# find libcuda.so and lbnvrtc.so
# For libcuda.so, we will find it under lib, lib64, and then the
# stubs folder, in case we are building on a system that does not
# have cuda driver installed. On windows, we also search under the
# folder lib/x64.

find_library(CUDA_CUDA_LIB cuda
    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
    PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64)
find_library(CUDA_NVRTC_LIB nvrtc
    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
    PATH_SUFFIXES lib lib64 lib/x64)

# setting nvcc arch flags
caffe2_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")

if(CUDA_CUDA_LIB)
    message(STATUS "Found libcuda: ${CUDA_CUDA_LIB}")
    list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDA_LIB})
else()
    message(FATAL_ERROR "Cannot find libcuda.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.")
endif()
if(CUDA_NVRTC_LIB)
  message(STATUS "Found libnvrtc: ${CUDA_NVRTC_LIB}")
  list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_NVRTC_LIB})
else()
    message(FATAL_ERROR "Cannot find libnvrtc.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.")
endif()

# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
  list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
endforeach()

# Set C++11 support
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if (NOT MSVC)
  list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
endif()

# Debug and Release symbol support
if (MSVC)
  if (${CMAKE_BUILD_TYPE} MATCHES "Release")
    if (${BUILD_SHARED_LIBS})
      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MD")
    else()
      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MT")
    endif()
  elseif(${CMAKE_BUILD_TYPE} MATCHES "Debug")
    message(FATAL_ERROR
            "Caffe2 currently does not support the combination of MSVC, Cuda "
            "and Debug mode. Either set USE_CUDA=OFF or set the build type "
            "to Release")
    if (${BUILD_SHARED_LIBS})
      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MDd")
    else()
      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MTd")
    endif()
  else()
    message(FATAL_ERROR "Unknown cmake build type: " ${CMAKE_BUILD_TYPE})
  endif()
endif()


if(OpenMP_FOUND)
  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ${OpenMP_CXX_FLAGS}")
endif()

# Set :expt-relaxed-constexpr to suppress Eigen warnings
list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)


================================================
FILE: cmake/legacy/Dependencies.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Dependencies.cmake

# Find CUDA.
include(cmake/legacy/Cuda.cmake)
if (HAVE_CUDA)
  # CUDA 9.x requires GCC version <= 6
  if ((CUDA_VERSION VERSION_EQUAL   9.0) OR
      (CUDA_VERSION VERSION_GREATER 9.0  AND CUDA_VERSION VERSION_LESS 10.0))
    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0 AND
        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)
      message(FATAL_ERROR
        "CUDA ${CUDA_VERSION} is not compatible with GCC version >= 7. "
        "Use the following option to use another version (for example): \n"
        "  -DCUDA_HOST_COMPILER=/usr/bin/gcc-6\n")
    endif()
  # CUDA 8.0 requires GCC version <= 5
  elseif (CUDA_VERSION VERSION_EQUAL 8.0)
    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 6.0 AND
        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)
      message(FATAL_ERROR
        "CUDA 8.0 is not compatible with GCC version >= 6. "
        "Use the following option to use another version (for example): \n"
        "  -DCUDA_HOST_COMPILER=/usr/bin/gcc-5\n")
    endif()
  endif()
endif()

# Find CUDNN.
if (HAVE_CUDA)
  find_package(CuDNN REQUIRED)
  if (CUDNN_FOUND)
    caffe2_include_directories(${CUDNN_INCLUDE_DIRS})
  endif()
endif()


================================================
FILE: cmake/legacy/Modules/FindCuDNN.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Modules/FindCuDNN.cmake

# - Try to find cuDNN
#
# The following variables are optionally searched for defaults
#  CUDNN_ROOT_DIR:            Base directory where all cuDNN components are found
#
# The following are set after configuration is done:
#  CUDNN_FOUND
#  CUDNN_INCLUDE_DIRS
#  CUDNN_LIBRARIES
#  CUDNN_LIBRARY_DIRS

include(FindPackageHandleStandardArgs)

set(CUDNN_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA cuDNN")

find_path(CUDNN_INCLUDE_DIR cudnn.h
    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
    PATH_SUFFIXES cuda/include include)

find_library(CUDNN_LIBRARY cudnn
    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
    PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)

find_package_handle_standard_args(
    CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY)

if(CUDNN_FOUND)
	# get cuDNN version
  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS)
	string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
				 CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
	string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
				 CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
	string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
				 CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
	string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
				 CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
	string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
				 CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
	string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
				 CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
  # Assemble cuDNN version
  if(NOT CUDNN_VERSION_MAJOR)
    set(CUDNN_VERSION "?")
  else()
    set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
  endif()

  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
  message(STATUS "Found cuDNN: v${CUDNN_VERSION}  (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})")
  mark_as_advanced(CUDNN_ROOT_DIR CUDNN_LIBRARY CUDNN_INCLUDE_DIR)
endif()


================================================
FILE: cmake/legacy/Summary.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake

# Prints configuration summary.
function (detectron_print_config_summary)
  message(STATUS "Summary:")
  message(STATUS "  CMake version        : ${CMAKE_VERSION}")
  message(STATUS "  CMake command        : ${CMAKE_COMMAND}")
  message(STATUS "  System name          : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "  C++ compiler         : ${CMAKE_CXX_COMPILER}")
  message(STATUS "  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
  message(STATUS "  CXX flags            : ${CMAKE_CXX_FLAGS}")
  message(STATUS "  Caffe2 version       : ${CAFFE2_VERSION}")
  message(STATUS "  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}")
  message(STATUS "  Have CUDA            : ${HAVE_CUDA}")
  if (${HAVE_CUDA})
    message(STATUS "    CUDA version       : ${CUDA_VERSION}")
    message(STATUS "    CuDNN version      : ${CUDNN_VERSION}")
  endif()
endfunction()


================================================
FILE: cmake/legacy/Utils.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Utils.cmake

################################################################################################
# Exclude and prepend functionalities
function (exclude OUTPUT INPUT)
set(EXCLUDES ${ARGN})
foreach(EXCLUDE ${EXCLUDES})
        list(REMOVE_ITEM INPUT "${EXCLUDE}")
endforeach()
set(${OUTPUT} ${INPUT} PARENT_SCOPE)
endfunction(exclude)

function (prepend OUTPUT PREPEND)
set(OUT "")
foreach(ITEM ${ARGN})
        list(APPEND OUT "${PREPEND}${ITEM}")
endforeach()
set(${OUTPUT} ${OUT} PARENT_SCOPE)
endfunction(prepend)


################################################################################################
# Clears variables from list
# Usage:
#   caffe_clear_vars(<variables_list>)
macro(caffe_clear_vars)
  foreach(_var ${ARGN})
    unset(${_var})
  endforeach()
endmacro()

################################################################################################
# Prints list element per line
# Usage:
#   caffe_print_list(<list>)
function(caffe_print_list)
  foreach(e ${ARGN})
    message(STATUS ${e})
  endforeach()
endfunction()

################################################################################################
# Reads set of version defines from the header file
# Usage:
#   caffe_parse_header(<file> <define1> <define2> <define3> ..)
macro(caffe_parse_header FILENAME FILE_VAR)
  set(vars_regex "")
  set(__parnet_scope OFF)
  set(__add_cache OFF)
  foreach(name ${ARGN})
    if("${name}" STREQUAL "PARENT_SCOPE")
      set(__parnet_scope ON)
    elseif("${name}" STREQUAL "CACHE")
      set(__add_cache ON)
    elseif(vars_regex)
      set(vars_regex "${vars_regex}|${name}")
    else()
      set(vars_regex "${name}")
    endif()
  endforeach()
  if(EXISTS "${FILENAME}")
    file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
  else()
    unset(${FILE_VAR})
  endif()
  foreach(name ${ARGN})
    if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
      if(${FILE_VAR})
        if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
          string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
        else()
          set(${name} "")
        endif()
        if(__add_cache)
          set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
        elseif(__parnet_scope)
          set(${name} "${${name}}" PARENT_SCOPE)
        endif()
      else()
        unset(${name} CACHE)
      endif()
    endif()
  endforeach()
endmacro()

################################################################################################
# Reads single version define from the header file and parses it
# Usage:
#   caffe_parse_header_single_define(<library_name> <file> <define_name>)
function(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME)
  set(${LIBNAME}_H "")
  if(EXISTS "${HDR_PATH}")
    file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
  endif()

  if(${LIBNAME}_H)
    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR  "${${LIBNAME}_H}")
    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
    set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
    set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
    set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
    set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)

    # append a TWEAK version if it exists:
    set(${LIBNAME}_VERSION_TWEAK "")
    if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
      set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
    endif()
    if(${LIBNAME}_VERSION_TWEAK)
      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
    else()
      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
    endif()
  endif()
endfunction()

########################################################################################################
# An option that the user can select. Can accept condition to control when option is available for user.
# Usage:
#   caffe_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
function(caffe_option variable description value)
  set(__value ${value})
  set(__condition "")
  set(__varname "__value")
  foreach(arg ${ARGN})
    if(arg STREQUAL "IF" OR arg STREQUAL "if")
      set(__varname "__condition")
    else()
      list(APPEND ${__varname} ${arg})
    endif()
  endforeach()
  unset(__varname)
  if("${__condition}" STREQUAL "")
    set(__condition 2 GREATER 1)
  endif()

  if(${__condition})
    if("${__value}" MATCHES ";")
      if(${__value})
        option(${variable} "${description}" ON)
      else()
        option(${variable} "${description}" OFF)
      endif()
    elseif(DEFINED ${__value})
      if(${__value})
        option(${variable} "${description}" ON)
      else()
        option(${variable} "${description}" OFF)
      endif()
    else()
      option(${variable} "${description}" ${__value})
    endif()
  else()
    unset(${variable} CACHE)
  endif()
endfunction()

##############################################################################
# Helper function to add as-needed flag around a library.
function(caffe_add_as_needed_flag lib output_var)
  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
    # TODO: Clang seems to not need this flag. Double check.
    set(${output_var} ${lib} PARENT_SCOPE)
  elseif(MSVC)
    # TODO: check what is the behavior of MSVC.
    # In MSVC, we will add whole archive in default.
    set(${output_var} ${lib} PARENT_SCOPE)
  else()
    # Assume everything else is like gcc: we will need as-needed flag.
    set(${output_var} -Wl,--no-as-needed ${lib} -Wl,--as-needed PARENT_SCOPE)
  endif()
endfunction()

##############################################################################
# Helper function to add whole_archive flag around a library.
function(caffe_add_whole_archive_flag lib output_var)
  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
    set(${output_var} -Wl,-force_load,$<TARGET_FILE:${lib}> PARENT_SCOPE)
  elseif(MSVC)
    # In MSVC, we will add whole archive in default.
    set(${output_var} -WHOLEARCHIVE:$<TARGET_FILE:${lib}> PARENT_SCOPE)
  else()
    # Assume everything else is like gcc
    set(${output_var} -Wl,--whole-archive ${lib} -Wl,--no-whole-archive PARENT_SCOPE)
  endif()
endfunction()

##############################################################################
# Helper function to add either as-needed, or whole_archive flag around a library.
function(caffe_add_linker_flag lib output_var)
  if (BUILD_SHARED_LIBS)
    caffe_add_as_needed_flag(${lib} tmp)
  else()
    caffe_add_whole_archive_flag(${lib} tmp)
  endif()
  set(${output_var} ${tmp} PARENT_SCOPE)
endfunction()

##############################################################################
# Helper function to automatically generate __init__.py files where python
# sources reside but there are no __init__.py present.
function(caffe_autogen_init_py_files)
  file(GLOB_RECURSE all_python_files RELATIVE ${PROJECT_SOURCE_DIR}
       "${PROJECT_SOURCE_DIR}/caffe2/*.py")
  set(python_paths_need_init_py)
  foreach(python_file ${all_python_files})
    get_filename_component(python_path ${python_file} PATH)
    string(REPLACE "/" ";" path_parts ${python_path})
    set(rebuilt_path ${CMAKE_BINARY_DIR})
    foreach(path_part ${path_parts})
      set(rebuilt_path "${rebuilt_path}/${path_part}")
      list(APPEND python_paths_need_init_py ${rebuilt_path})
    endforeach()
  endforeach()
  list(REMOVE_DUPLICATES python_paths_need_init_py)
  # Since the _pb2.py files are yet to be created, we will need to manually
  # add them to the list.
  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe)
  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe/proto)
  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe2/proto)

  foreach(tmp ${python_paths_need_init_py})
    if(NOT EXISTS ${tmp}/__init__.py)
      # message(STATUS "Generate " ${tmp}/__init__.py)
      file(WRITE ${tmp}/__init__.py "")
    endif()
  endforeach()
endfunction()

##############################################################################
# Creating a Caffe2 binary target with sources specified with relative path.
# Usage:
#   caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
# If only target_name_or_src is specified, this target is build with one single
# source file and the target name is autogen from the filename. Otherwise, the
# target name is given by the first argument and the rest are the source files
# to build the target.
function(caffe2_binary_target target_name_or_src)
  if (${ARGN})
    set(__target ${target_name_or_src})
    prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}")
  else()
    get_filename_component(__target ${target_name_or_src} NAME_WE)
    prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${target_name_or_src}")
  endif()
  add_executable(${__target} ${__srcs})
  add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER})
  target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS})
  install(TARGETS ${__target} DESTINATION bin)
endfunction()

##############################################################################
# Helper function to add paths to system include directories.
#
# Anaconda distributions typically contain a lot of packages and some
# of those can conflict with headers/libraries that must be sourced
# from elsewhere. This helper ensures that Anaconda paths are always
# added AFTER other include paths, such that it does not accidentally
# takes precedence when it shouldn't.
#
# This is just a heuristic and does not have any guarantees. We can
# add other corner cases here (as long as they are generic enough).
# A complete include path cross checker is a final resort if this
# hacky approach proves insufficient.
#
function(caffe2_include_directories)
  foreach(path IN LISTS ARGN)
    if (${path} MATCHES "/anaconda")
      include_directories(AFTER SYSTEM ${path})
    else()
      include_directories(BEFORE SYSTEM ${path})
    endif()
  endforeach()
endfunction()


================================================
FILE: cmake/legacy/legacymake.cmake
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# This file contains legacy cmake scripts that is going to be removed
# in a future release.

# Add CMake modules.
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/legacy/Modules)

# Add compiler flags.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing")

# Include Caffe2 CMake utils.
include(cmake/legacy/Utils.cmake)

# Find dependencies.
include(cmake/legacy/Dependencies.cmake)

# Print configuration summary.
include(cmake/legacy/Summary.cmake)
detectron_print_config_summary()

# Collect custom ops sources.
file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)
file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)

# Install custom CPU ops lib.
add_library(
     caffe2_detectron_custom_ops SHARED
     ${CUSTOM_OPS_CPU_SRCS})

target_include_directories(
    caffe2_detectron_custom_ops PRIVATE
    ${CAFFE2_INCLUDE_DIRS})
target_link_libraries(caffe2_detectron_custom_ops caffe2)
install(TARGETS caffe2_detectron_custom_ops DESTINATION lib)

# Install custom GPU ops lib.
if (${HAVE_CUDA})
  # Additional -I prefix is required for CMake versions before commit (< 3.7):
  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594
  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})
  CUDA_ADD_LIBRARY(
      caffe2_detectron_custom_ops_gpu SHARED
      ${CUSTOM_OPS_CPU_SRCS}
      ${CUSTOM_OPS_GPU_SRCS})

  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu)
  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)
endif()


================================================
FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 270000
  STEPS: [0, 210000, 250000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 270000
  STEPS: [0, 210000, 250000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml
================================================
# WARNING: this script uses **pre-computed** BN-based proposals, and is for quick debugging only.
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 270000
  STEPS: [0, 210000, 250000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # WEIGHTS: N/A
  FREEZE_AT: 0
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 270000
  STEPS: [0, 210000, 250000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
  USE_GN: True  # Note: use GN on the FPN-specific layers
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # WEIGHTS: N/A
  FREEZE_AT: 0
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 6000
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 6000
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  FASTER_RCNN: True
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
  RESOLUTION: 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default: GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 6000
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
  RESOLUTION: 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default: GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 6000
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet152_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 260000
  STEPS: [0, 200000, 240000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (640, 672, 704, 736, 768, 800)  # Scale jitter
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  BBOX_VOTE:
    ENABLED: True
    VOTE_TH: 0.9
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
  BBOX_AUG:
    ENABLED: True
    SCORE_HEUR: UNION
    COORD_HEUR: UNION
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: False
    ASPECT_RATIOS: ()
    ASPECT_RATIO_H_FLIP: False
  MASK_AUG:
    ENABLED: True
    HEUR: SOFT_AVG
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: False
    ASPECT_RATIOS: ()
    ASPECT_RATIO_H_FLIP: False
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 130000
  STEPS: [0, 100000, 120000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
  RESOLUTION: 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default: GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
FAST_RCNN:
  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
  ROI_XFORM_METHOD: RoIAlign
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
  RESOLUTION: 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default: GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 360000
  STEPS: [0, 240000, 320000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (800,)
  MAX_SIZE: 1333
  IMS_PER_BATCH: 1
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml
================================================
MODEL:
  TYPE: retinanet
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 7
  RPN_MIN_LEVEL: 3
  COARSEST_STRIDE: 128
  EXTRA_CONV_LEVELS: True
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
RETINANET:
  RETINANET_ON: True
  NUM_CONVS: 4
  ASPECT_RATIOS: (1.0, 2.0, 0.5)
  SCALES_PER_OCTAVE: 3
  ANCHOR_SCALE: 4
  LOSS_GAMMA: 2.0
  LOSS_ALPHA: 0.25
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  RPN_STRADDLE_THRESH: -1  # default 0
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_R-50-C4_1x.yaml
================================================
MODEL:
  TYPE: rpn
  CONV_BODY: ResNet.add_ResNet50_conv4_body
  NUM_CLASSES: 81
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
RPN:
  SIZES: (32, 64, 128, 256, 512)
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
  SCALE: 800
  MAX_SIZE: 1333
USE_NCCL: False
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 81
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 32
  WIDTH_PER_GROUP: 8
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
  NUM_CLASSES: 2
  RPN_ONLY: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_RPN: True
  RPN_MAX_LEVEL: 6
  RPN_MIN_LEVEL: 2
  RPN_ANCHOR_START_SIZE: 32
  RPN_ASPECT_RATIOS: (0.5, 1, 2)
RESNETS:
  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
  TRANS_FUNC: bottleneck_transformation
  NUM_GROUPS: 64
  WIDTH_PER_GROUP: 4
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
TEST:
  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
  SCALE: 800
  MAX_SIZE: 1333
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 2000
OUTPUT_DIR: .


================================================
FILE: configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 1
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.0025
  GAMMA: 0.1
  MAX_ITER: 60000
  STEPS: [0, 30000, 40000]
  # Equivalent schedules with...
  # 1 GPU:
  #   BASE_LR: 0.0025
  #   MAX_ITER: 60000
  #   STEPS: [0, 30000, 40000]
  # 2 GPUs:
  #   BASE_LR: 0.005
  #   MAX_ITER: 30000
  #   STEPS: [0, 15000, 20000]
  # 4 GPUs:
  #   BASE_LR: 0.01
  #   MAX_ITER: 15000
  #   STEPS: [0, 7500, 10000]
  # 8 GPUs:
  #   BASE_LR: 0.02
  #   MAX_ITER: 7500
  #   STEPS: [0, 3750, 5000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train',)
  SCALES: (500,)
  MAX_SIZE: 833
  BATCH_SIZE_PER_IM: 256
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 500
  MAX_SIZE: 833
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 2
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.005
  GAMMA: 0.1
  MAX_ITER: 30000
  STEPS: [0, 15000, 20000]
  # Equivalent schedules with...
  # 1 GPU:
  #   BASE_LR: 0.0025
  #   MAX_ITER: 60000
  #   STEPS: [0, 30000, 40000]
  # 2 GPUs:
  #   BASE_LR: 0.005
  #   MAX_ITER: 30000
  #   STEPS: [0, 15000, 20000]
  # 4 GPUs:
  #   BASE_LR: 0.01
  #   MAX_ITER: 15000
  #   STEPS: [0, 7500, 10000]
  # 8 GPUs:
  #   BASE_LR: 0.02
  #   MAX_ITER: 7500
  #   STEPS: [0, 3750, 5000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train',)
  SCALES: (500,)
  MAX_SIZE: 833
  BATCH_SIZE_PER_IM: 256
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 500
  MAX_SIZE: 833
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 4
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.01
  GAMMA: 0.1
  MAX_ITER: 15000
  STEPS: [0, 7500, 10000]
  # Equivalent schedules with...
  # 1 GPU:
  #   BASE_LR: 0.0025
  #   MAX_ITER: 60000
  #   STEPS: [0, 30000, 40000]
  # 2 GPUs:
  #   BASE_LR: 0.005
  #   MAX_ITER: 30000
  #   STEPS: [0, 15000, 20000]
  # 4 GPUs:
  #   BASE_LR: 0.01
  #   MAX_ITER: 15000
  #   STEPS: [0, 7500, 10000]
  # 8 GPUs:
  #   BASE_LR: 0.02
  #   MAX_ITER: 7500
  #   STEPS: [0, 3750, 5000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train',)
  SCALES: (500,)
  MAX_SIZE: 833
  BATCH_SIZE_PER_IM: 256
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 500
  MAX_SIZE: 833
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
================================================
MODEL:
  TYPE: generalized_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 7500
  STEPS: [0, 3750, 5000]
  # Equivalent schedules with...
  # 1 GPU:
  #   BASE_LR: 0.0025
  #   MAX_ITER: 60000
  #   STEPS: [0, 30000, 40000]
  # 2 GPUs:
  #   BASE_LR: 0.005
  #   MAX_ITER: 30000
  #   STEPS: [0, 15000, 20000]
  # 4 GPUs:
  #   BASE_LR: 0.01
  #   MAX_ITER: 15000
  #   STEPS: [0, 7500, 10000]
  # 8 GPUs:
  #   BASE_LR: 0.02
  #   MAX_ITER: 7500
  #   STEPS: [0, 3750, 5000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train',)
  SCALES: (500,)
  MAX_SIZE: 833
  BATCH_SIZE_PER_IM: 256
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 500
  MAX_SIZE: 833
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .


================================================
FILE: configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml
================================================
MODEL:
  TYPE: mask_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 81
  FASTER_RCNN: True
  MASK_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 180000
  STEPS: [0, 120000, 160000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
MRCNN:
  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
  RESOLUTION: 28  # (output mask resolution) default 14
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14  # default 7
  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
  DILATION: 1  # default 2
  CONV_INIT: MSRAFill  # default GaussianFill
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
  SCALES: (800,)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
TEST:
  DATASETS: ('coco_2014_minival',)
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
  RPN_POST_NMS_TOP_N: 1000
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl

  # -- Test time augmentation example -- #
  BBOX_AUG:
    ENABLED: True
    SCORE_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model
    COORD_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: False
    AREA_TH_LO: 2500   # 50^2
    AREA_TH_HI: 32400  # 180^2
    ASPECT_RATIOS: ()
    ASPECT_RATIO_H_FLIP: False
  MASK_AUG:
    ENABLED: True
    HEUR: SOFT_AVG
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: False
    AREA_TH: 32400  # 180^2
    ASPECT_RATIOS: ()
    ASPECT_RATIO_H_FLIP: False
  BBOX_VOTE:
    ENABLED: True
    VOTE_TH: 0.9
  # -- Test time augmentation example -- #

USE_NCCL: False
OUTPUT_DIR: .


================================================
FILE: configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml
================================================
MODEL:
  TYPE: keypoint_rcnn
  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
  NUM_CLASSES: 2
  KEYPOINTS_ON: True
NUM_GPUS: 8
SOLVER:
  WEIGHT_DECAY: 0.0001
  LR_POLICY: steps_with_decay
  BASE_LR: 0.02
  GAMMA: 0.1
  MAX_ITER: 90000
  STEPS: [0, 60000, 80000]
FPN:
  FPN_ON: True
  MULTILEVEL_ROIS: True
  MULTILEVEL_RPN: True  # accidentally True; disable in the future
FAST_RCNN:
  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
  ROI_XFORM_SAMPLING_RATIO: 2
KRCNN:
  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
  NUM_STACKED_CONVS: 8
  NUM_KEYPOINTS: 17
  USE_DECONV_OUTPUT: True
  CONV_INIT: MSRAFill
  CONV_HEAD_DIM: 512
  UP_SCALE: 2
  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 14
  ROI_XFORM_SAMPLING_RATIO: 2
  KEYPOINT_CONFIDENCE: bbox
TRAIN:
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
  SCALES: (640, 672, 704, 736, 768, 800)
  MAX_SIZE: 1333
  BATCH_SIZE_PER_IM: 512
TEST:
  DATASETS: ('keypoints_coco_2014_minival',)
  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
  PROPOSAL_LIMIT: 1000
  SCALE: 800
  MAX_SIZE: 1333
  NMS: 0.5
  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl

  # -- Test time augmentation example -- #
  BBOX_AUG:
    ENABLED: True
    SCORE_HEUR: AVG
    COORD_HEUR: AVG
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: False
    AREA_TH_LO: 2500  # 50^2
    AREA_TH_HI: 32400  # 180^2
  KPS_AUG:
    ENABLED: True
    HEUR: HM_AVG
    H_FLIP: True
    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
    MAX_SIZE: 2000
    SCALE_H_FLIP: True
    SCALE_SIZE_DEP: True
    AREA_TH: 22500  # 150^2
    ASPECT_RATIOS: ()
    ASPECT_RATIO_H_FLIP: False
  # -- Test time augmentation example -- #

OUTPUT_DIR: .


================================================
FILE: demo/NOTICE
================================================
The demo images are licensed as United States government work:
https://www.usa.gov/government-works

The image files were obtained on Jan 13, 2018 from the following
URLs.

16004479832_a748d55f21_k.jpg
https://www.flickr.com/photos/archivesnews/16004479832

18124840932_e42b3e377c_k.jpg
https://www.flickr.com/photos/usnavy/18124840932

33887522274_eebd074106_k.jpg
https://www.flickr.com/photos/usaid_pakistan/33887522274

15673749081_767a7fa63a_k.jpg
https://www.flickr.com/photos/usnavy/15673749081

34501842524_3c858b3080_k.jpg
https://www.flickr.com/photos/departmentofenergy/34501842524

24274813513_0cfd2ce6d0_k.jpg
https://www.flickr.com/photos/dhsgov/24274813513

19064748793_bb942deea1_k.jpg
https://www.flickr.com/photos/statephotos/19064748793

33823288584_1d21cf0a26_k.jpg
https://www.flickr.com/photos/cbpphotos/33823288584

17790319373_bd19b24cfc_k.jpg
https://www.flickr.com/photos/secdef/17790319373


================================================
FILE: detectron/__init__.py
================================================


================================================
FILE: detectron/core/__init__.py
================================================


================================================
FILE: detectron/core/config.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Detectron config system.

This file specifies default config options for Detectron. You should not
change values in this file. Instead, you should write a config file (in yaml)
and use merge_cfg_from_file(yaml_file) to load it and override the default
options.

Most tools in the tools directory take a --cfg option to specify an override
file and an optional list of override (key, value) pairs:
 - See tools/{train,test}_net.py for example code that uses merge_cfg_from_file
 - See configs/*/*.yaml for example config files

Detectron supports a lot of different model types, each of which has a lot of
different options. The result is a HUGE set of configuration options.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from ast import literal_eval
from future.utils import iteritems
import copy
import io
import logging
import numpy as np
import os
import os.path as osp
import six

from detectron.utils.collections import AttrDict
from detectron.utils.io import cache_url

logger = logging.getLogger(__name__)

__C = AttrDict()
# Consumers can get config by:
#   from detectron.core.config import cfg
cfg = __C

# Random note: avoid using '.ON' as a config key since yaml converts it to True;
# prefer 'ENABLED' instead

# ---------------------------------------------------------------------------- #
# Training options
# ---------------------------------------------------------------------------- #
__C.TRAIN = AttrDict()

# Initialize network with weights from this .pkl file
__C.TRAIN.WEIGHTS = ''

# Datasets to train on
# Available dataset list: detectron.datasets.dataset_catalog.datasets()
# If multiple datasets are listed, the model is trained on their union
__C.TRAIN.DATASETS = ()

# Scales to use during training
# Each scale is the pixel size of an image's shortest side
# If multiple scales are listed, then one is selected uniformly at random for
# each training image (i.e., scale jitter data augmentation)
__C.TRAIN.SCALES = (600, )

# Max pixel size of the longest side of a scaled input image
__C.TRAIN.MAX_SIZE = 1000

# Images *per GPU* in the training minibatch
# Total images per minibatch = TRAIN.IMS_PER_BATCH * NUM_GPUS
__C.TRAIN.IMS_PER_BATCH = 2

# RoI minibatch size *per image* (number of regions of interest [ROIs])
# Total number of RoIs per training minibatch =
#   TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS
# E.g., a common configuration is: 512 * 2 * 8 = 8192
__C.TRAIN.BATCH_SIZE_PER_IM = 64

# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25

# Overlap threshold for an RoI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5

# Overlap threshold for an RoI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0

# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True

# Overlap required between an RoI and a ground-truth box in order for that
# (RoI, gt box) pair to be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5

# Snapshot (model checkpoint) period
# Divide by NUM_GPUS to determine actual period (e.g., 80000/8 => 10000 iters)
# to allow for linear training schedule scaling
__C.TRAIN.SNAPSHOT_ITERS = 80000

# Train using these proposals
# During training, all proposals specified in the file are used (no limit is
# applied)
# Proposal files must be in correspondence with the datasets listed in
# TRAIN.DATASETS
__C.TRAIN.PROPOSAL_FILES = ()

# Make minibatches from images that have similar aspect ratios (i.e. both
# tall and thin or both short and wide)
# This feature is critical for saving memory (and makes training slightly
# faster)
__C.TRAIN.ASPECT_GROUPING = True

# ---------------------------------------------------------------------------- #
# RPN training options
# ---------------------------------------------------------------------------- #

# Run GenerateProposals on GPU if set to True
__C.TRAIN.GENERATE_PROPOSALS_ON_GPU = False

# Minimum overlap required between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a positive example (IOU >= thresh ==> positive RPN
# example)
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7

# Maximum overlap allowed between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a negative examples (IOU < thresh ==> negative RPN
# example)
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3

# Target fraction of foreground (positive) examples per RPN minibatch
__C.TRAIN.RPN_FG_FRACTION = 0.5

# Total number of RPN examples per image
__C.TRAIN.RPN_BATCH_SIZE_PER_IM = 256

# NMS threshold used on RPN proposals (used during end-to-end training with RPN)
__C.TRAIN.RPN_NMS_THRESH = 0.7

# Number of top scoring RPN proposals to keep before applying NMS
# When FPN is used, this is *per FPN level* (not total)
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000

# Number of top scoring RPN proposals to keep after applying NMS
# This is the total number of RPN proposals produced (for both FPN and non-FPN
# cases)
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000

# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0

# Proposal height and width both need to be greater than RPN_MIN_SIZE
# (at orig image scale; not scale used during training or inference)
__C.TRAIN.RPN_MIN_SIZE = 0

# Filter proposals that are inside of crowd regions by CROWD_FILTER_THRESH
# "Inside" is measured as: proposal-with-crowd intersection area divided by
# proposal area
__C.TRAIN.CROWD_FILTER_THRESH = 0.7

# Ignore ground-truth objects with area < this threshold
__C.TRAIN.GT_MIN_AREA = -1

# Freeze the backbone architecture during training if set to True
__C.TRAIN.FREEZE_CONV_BODY = False

# Training will resume from the latest snapshot (model checkpoint) found in the
# output directory
__C.TRAIN.AUTO_RESUME = True

# Training will copy TRAIN.WEIGHTS and treat it as a candidate checkpoint
__C.TRAIN.COPY_WEIGHTS = False

# Add StopGrad at a specified stage so the bottom layers are frozen
__C.TRAIN.FREEZE_AT = 2


# ---------------------------------------------------------------------------- #
# Data loader options (see detectron/roi_data/loader.py for more info)
# ---------------------------------------------------------------------------- #
__C.DATA_LOADER = AttrDict()

# Number of Python threads to use for the data loader (warning: using too many
# threads can cause GIL-based interference with Python Ops leading to *slower*
# training; 4 seems to be the sweet spot in our experience)
__C.DATA_LOADER.NUM_THREADS = 4

# Size of the shared minibatch queue
__C.DATA_LOADER.MINIBATCH_QUEUE_SIZE = 64

# Capacity of the per GPU blobs queue
__C.DATA_LOADER.BLOBS_QUEUE_CAPACITY = 8


# ---------------------------------------------------------------------------- #
# Inference ('test') options
# ---------------------------------------------------------------------------- #
__C.TEST = AttrDict()

# Initialize network with weights from this .pkl file
__C.TEST.WEIGHTS = ''

# Datasets to test on
# Available dataset list: detectron.datasets.dataset_catalog.datasets()
# If multiple datasets are listed, testing is performed on each one sequentially
__C.TEST.DATASETS = ()

# Scale to use during testing
__C.TEST.SCALE = 600

# Max pixel size of the longest side of a scaled input image
__C.TEST.MAX_SIZE = 1000

# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
__C.TEST.NMS = 0.3

# Apply Fast R-CNN style bounding-box regression if True
__C.TEST.BBOX_REG = True

# Test using these proposal files (must correspond with TEST.DATASETS)
__C.TEST.PROPOSAL_FILES = ()

# Run GenerateProposals on GPU if set to True
__C.TEST.GENERATE_PROPOSALS_ON_GPU = False

# Limit on the number of proposals per image used during inference
__C.TEST.PROPOSAL_LIMIT = 2000

# NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7

# Number of top scoring RPN proposals to keep before applying NMS
# When FPN is used, this is *per FPN level* (not total)
__C.TEST.RPN_PRE_NMS_TOP_N = 12000

# Number of top scoring RPN proposals to keep after applying NMS
# This is the total number of RPN proposals produced (for both FPN and non-FPN
# cases)
__C.TEST.RPN_POST_NMS_TOP_N = 2000

# Proposal height and width both need to be greater than RPN_MIN_SIZE
# (at orig image scale; not scale used during training or inference)
__C.TEST.RPN_MIN_SIZE = 0

# Maximum number of detections to return per image (100 is based on the limit
# established for the COCO dataset)
__C.TEST.DETECTIONS_PER_IM = 100

# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
# balance obtaining high recall with not having too many low precision
# detections that will slow down inference post processing steps (like NMS)
__C.TEST.SCORE_THRESH = 0.05

# Save detection results files if True
# If false, results files are cleaned up (they can be large) after local
# evaluation
__C.TEST.COMPETITION_MODE = True

# Evaluate detections with the COCO json dataset eval code even if it's not the
# evaluation code for the dataset (e.g. evaluate PASCAL VOC results using the
# COCO API to get COCO style AP on PASCAL VOC)
__C.TEST.FORCE_JSON_DATASET_EVAL = False

# [Inferred value; do not set directly in a config]
# Indicates if precomputed proposals are used at test time
# Not set for 1-stage models and 2-stage models with RPN subnetwork enabled
__C.TEST.PRECOMPUTED_PROPOSALS = True

# Evaluate proposals in class-specific Average Recall (AR).
# It means that one first computes AR within each category and then averages
# over the categories. It is not biased towards the AR of frequent categories
# compared with class-agnostic AR.
__C.TEST.CLASS_SPECIFIC_AR = False

# ---------------------------------------------------------------------------- #
# Test-time augmentations for bounding box detection
# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example
# ---------------------------------------------------------------------------- #
__C.TEST.BBOX_AUG = AttrDict()

# Enable test-time augmentation for bounding box detection if True
__C.TEST.BBOX_AUG.ENABLED = False

# Heuristic used to combine predicted box scores
#   Valid options: ('ID', 'AVG', 'UNION')
__C.TEST.BBOX_AUG.SCORE_HEUR = 'UNION'

# Heuristic used to combine predicted box coordinates
#   Valid options: ('ID', 'AVG', 'UNION')
__C.TEST.BBOX_AUG.COORD_HEUR = 'UNION'

# Horizontal flip at the original scale (id transform)
__C.TEST.BBOX_AUG.H_FLIP = False

# Each scale is the pixel size of an image's shortest side
__C.TEST.BBOX_AUG.SCALES = ()

# Max pixel size of the longer side
__C.TEST.BBOX_AUG.MAX_SIZE = 4000

# Horizontal flip at each scale
__C.TEST.BBOX_AUG.SCALE_H_FLIP = False

# Apply scaling based on object size
__C.TEST.BBOX_AUG.SCALE_SIZE_DEP = False
__C.TEST.BBOX_AUG.AREA_TH_LO = 50**2
__C.TEST.BBOX_AUG.AREA_TH_HI = 180**2

# Each aspect ratio is relative to image width
__C.TEST.BBOX_AUG.ASPECT_RATIOS = ()

# Horizontal flip at each aspect ratio
__C.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP = False

# ---------------------------------------------------------------------------- #
# Test-time augmentations for mask detection
# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example
# ---------------------------------------------------------------------------- #
__C.TEST.MASK_AUG = AttrDict()

# Enable test-time augmentation for instance mask detection if True
__C.TEST.MASK_AUG.ENABLED = False

# Heuristic used to combine mask predictions
# SOFT prefix indicates that the computation is performed on soft masks
#   Valid options: ('SOFT_AVG', 'SOFT_MAX', 'LOGIT_AVG')
__C.TEST.MASK_AUG.HEUR = 'SOFT_AVG'

# Horizontal flip at the original scale (id transform)
__C.TEST.MASK_AUG.H_FLIP = False

# Each scale is the pixel size of an image's shortest side
__C.TEST.MASK_AUG.SCALES = ()

# Max pixel size of the longer side
__C.TEST.MASK_AUG.MAX_SIZE = 4000

# Horizontal flip at each scale
__C.TEST.MASK_AUG.SCALE_H_FLIP = False

# Apply scaling based on object size
__C.TEST.MASK_AUG.SCALE_SIZE_DEP = False
__C.TEST.MASK_AUG.AREA_TH = 180**2

# Each aspect ratio is relative to image width
__C.TEST.MASK_AUG.ASPECT_RATIOS = ()

# Horizontal flip at each aspect ratio
__C.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP = False

# ---------------------------------------------------------------------------- #
# Test-augmentations for keypoints detection
# configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml
# ---------------------------------------------------------------------------- #
__C.TEST.KPS_AUG = AttrDict()

# Enable test-time augmentation for keypoint detection if True
__C.TEST.KPS_AUG.ENABLED = False

# Heuristic used to combine keypoint predictions
#   Valid options: ('HM_AVG', 'HM_MAX')
__C.TEST.KPS_AUG.HEUR = 'HM_AVG'

# Horizontal flip at the original scale (id transform)
__C.TEST.KPS_AUG.H_FLIP = False

# Each scale is the pixel size of an image's shortest side
__C.TEST.KPS_AUG.SCALES = ()

# Max pixel size of the longer side
__C.TEST.KPS_AUG.MAX_SIZE = 4000

# Horizontal flip at each scale
__C.TEST.KPS_AUG.SCALE_H_FLIP = False

# Apply scaling based on object size
__C.TEST.KPS_AUG.SCALE_SIZE_DEP = False
__C.TEST.KPS_AUG.AREA_TH = 180**2

# Eeach aspect ratio is realtive to image width
__C.TEST.KPS_AUG.ASPECT_RATIOS = ()

# Horizontal flip at each aspect ratio
__C.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP = False

# ---------------------------------------------------------------------------- #
# Soft NMS
# ---------------------------------------------------------------------------- #
__C.TEST.SOFT_NMS = AttrDict()

# Use soft NMS instead of standard NMS if set to True
__C.TEST.SOFT_NMS.ENABLED = False
# See soft NMS paper for definition of these options
__C.TEST.SOFT_NMS.METHOD = 'linear'
__C.TEST.SOFT_NMS.SIGMA = 0.5
# For the soft NMS overlap threshold, we simply use TEST.NMS

# ---------------------------------------------------------------------------- #
# Bounding box voting (from the Multi-Region CNN paper)
# ---------------------------------------------------------------------------- #
__C.TEST.BBOX_VOTE = AttrDict()

# Use box voting if set to True
__C.TEST.BBOX_VOTE.ENABLED = False

# We use TEST.NMS threshold for the NMS step. VOTE_TH overlap threshold
# is used to select voting boxes (IoU >= VOTE_TH) for each box that survives NMS
__C.TEST.BBOX_VOTE.VOTE_TH = 0.8

# The method used to combine scores when doing bounding box voting
# Valid options include ('ID', 'AVG', 'IOU_AVG', 'GENERALIZED_AVG', 'QUASI_SUM')
__C.TEST.BBOX_VOTE.SCORING_METHOD = 'ID'

# Hyperparameter used by the scoring method (it has different meanings for
# different methods)
__C.TEST.BBOX_VOTE.SCORING_METHOD_BETA = 1.0


# ---------------------------------------------------------------------------- #
# Model options
# ---------------------------------------------------------------------------- #
__C.MODEL = AttrDict()

# The type of model to use
# The string must match a function in the modeling.model_builder module
# (e.g., 'generalized_rcnn', 'mask_rcnn', ...)
__C.MODEL.TYPE = ''

# The backbone conv body to use
# The string must match a function that is imported in modeling.model_builder
# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN
# backbone)
__C.MODEL.CONV_BODY = ''

# Number of classes in the dataset; must be set
# E.g., 81 for COCO (80 foreground + 1 background)
__C.MODEL.NUM_CLASSES = -1

# Use a class agnostic bounding box regressor instead of the default per-class
# regressor
__C.MODEL.CLS_AGNOSTIC_BBOX_REG = False

# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
__C.MODEL.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)

# The meaning of FASTER_RCNN depends on the context (training vs. inference):
# 1) During training, FASTER_RCNN = True means that end-to-end training will be
#    used to jointly train the RPN subnetwork and the Fast R-CNN subnetwork
#    (Faster R-CNN = RPN + Fast R-CNN).
# 2) During inference, FASTER_RCNN = True means that the model's RPN subnetwork
#    will be used to generate proposals rather than relying on precomputed
#    proposals. Note that FASTER_RCNN = True can be used at inference time even
#    if the Faster R-CNN model was trained with stagewise training (which
#    consists of alternating between RPN and Fast R-CNN training in a way that
#    finally leads to a single network).
__C.MODEL.FASTER_RCNN = False

# Indicates the model makes instance mask predictions (as in Mask R-CNN)
__C.MODEL.MASK_ON = False

# Indicates the model makes keypoint predictions (as in Mask R-CNN for
# keypoints)
__C.MODEL.KEYPOINTS_ON = False

# Indicates the model's computation terminates with the production of RPN
# proposals (i.e., it outputs proposals ONLY, no actual object detections)
__C.MODEL.RPN_ONLY = False

# Caffe2 net execution type
# Use 'prof_dag' to get profiling statistics
__C.MODEL.EXECUTION_TYPE = 'dag'


# ---------------------------------------------------------------------------- #
# RetinaNet options
# ---------------------------------------------------------------------------- #
__C.RETINANET = AttrDict()

# RetinaNet is used (instead of Fast/er/Mask R-CNN/R-FCN/RPN) if True
__C.RETINANET.RETINANET_ON = False

# Anchor aspect ratios to use
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)

# Anchor scales per octave
__C.RETINANET.SCALES_PER_OCTAVE = 3

# At each FPN level, we generate anchors based on their scale, aspect_ratio,
# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
__C.RETINANET.ANCHOR_SCALE = 4

# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4

# Weight for bbox_regression loss
__C.RETINANET.BBOX_REG_WEIGHT = 1.0

# Smooth L1 loss beta for bbox regression
__C.RETINANET.BBOX_REG_BETA = 0.11

# During inference, #locs to select based on cls score before NMS is performed
# per FPN level
__C.RETINANET.PRE_NMS_TOP_N = 1000

# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
__C.RETINANET.POSITIVE_OVERLAP = 0.5

# IoU overlap ratio for labeling an anchor as negative
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4

# Focal loss parameter: alpha
__C.RETINANET.LOSS_ALPHA = 0.25

# Focal loss parameter: gamma
__C.RETINANET.LOSS_GAMMA = 2.0

# Prior prob for the positives at the beginning of training. This is used to set
# the bias init for the logits layer
__C.RETINANET.PRIOR_PROB = 0.01

# Whether classification and bbox branch tower should be shared or not
__C.RETINANET.SHARE_CLS_BBOX_TOWER = False

# Use class specific bounding box regression instead of the default class
# agnostic regression
__C.RETINANET.CLASS_SPECIFIC_BBOX = False

# Whether softmax should be used in classification branch training
__C.RETINANET.SOFTMAX = False

# Inference cls score threshold, anchors with score > INFERENCE_TH are
# considered for inference
__C.RETINANET.INFERENCE_TH = 0.05


# ---------------------------------------------------------------------------- #
# Solver options
# Note: all solver options are used exactly as specified; the implication is
# that if you switch from training on 1 GPU to N GPUs, you MUST adjust the
# solver configuration accordingly. We suggest using gradual warmup and the
# linear learning rate scaling rule as described in
# "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" Goyal et al.
# https://arxiv.org/abs/1706.02677
# ---------------------------------------------------------------------------- #
__C.SOLVER = AttrDict()

# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001

# Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'step'

# Some LR Policies (by example):
# 'step'
#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** (cur_iter // SOLVER.STEP_SIZE)
# 'steps_with_decay'
#   SOLVER.STEPS = [0, 60000, 80000]
#   SOLVER.GAMMA = 0.1
#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** current_step
#   iters [0, 59999] are in current_step = 0, iters [60000, 79999] are in
#   current_step = 1, and so on
# 'steps_with_lrs'
#   SOLVER.STEPS = [0, 60000, 80000]
#   SOLVER.LRS = [0.02, 0.002, 0.0002]
#   lr = LRS[current_step]
# 'cosine_decay'
#   lr = SOLVER.BASE_LR * (cos(PI * cur_iter / SOLVER.MAX_ITER) * 0.5 + 0.5)
# 'exp_decay'
#   lr smoothly decays from SOLVER.BASE_LR to SOLVER.GAMMA * SOLVER.BASE_LR
#   lr = SOLVER.BASE_LR * exp(np.log(SOLVER.GAMMA) * cur_iter / SOLVER.MAX_ITER)

# Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
# For 'exp_decay', SOLVER.GAMMA is the ratio between the final and initial LR.
__C.SOLVER.GAMMA = 0.1

# Uniform step size for 'steps' policy
__C.SOLVER.STEP_SIZE = 30000

# Non-uniform step iterations for 'steps_with_decay' or 'steps_with_lrs'
# policies
__C.SOLVER.STEPS = []

# Learning rates to use with 'steps_with_lrs' policy
__C.SOLVER.LRS = []

# Maximum number of SGD iterations
__C.SOLVER.MAX_ITER = 40000

# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9

# L2 regularization hyperparameter
__C.SOLVER.WEIGHT_DECAY = 0.0005
# L2 regularization hyperparameter for GroupNorm's parameters
__C.SOLVER.WEIGHT_DECAY_GN = 0.0

# Warm up to SOLVER.BASE_LR over this number of SGD iterations
__C.SOLVER.WARM_UP_ITERS = 500

# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0

# WARM_UP_METHOD can be either 'constant' or 'linear' (i.e., gradual)
__C.SOLVER.WARM_UP_METHOD = 'linear'

# Scale the momentum update history by new_lr / old_lr when updating the
# learning rate (this is correct given MomentumSGDUpdateOp)
__C.SOLVER.SCALE_MOMENTUM = True
# Only apply the correction if the relative LR change exceeds this threshold
# (prevents ever change in linear warm up from scaling the momentum by a tiny
# amount; momentum scaling is only important if the LR change is large)
__C.SOLVER.SCALE_MOMENTUM_THRESHOLD = 1.1

# Suppress logging of changes to LR unless the relative change exceeds this
# threshold (prevents linear warm up from spamming the training log)
__C.SOLVER.LOG_LR_CHANGE_THRESHOLD = 1.1


# ---------------------------------------------------------------------------- #
# Fast R-CNN options
# ---------------------------------------------------------------------------- #
__C.FAST_RCNN = AttrDict()

# The type of RoI head to use for bounding box classification and regression
# The string must match a function this is imported in modeling.model_builder
# (e.g., 'head_builder.add_roi_2mlp_head' to specify a two hidden layer MLP)
__C.FAST_RCNN.ROI_BOX_HEAD = ''

# Hidden layer dimension when using an MLP for the RoI box head
__C.FAST_RCNN.MLP_HEAD_DIM = 1024

# Hidden Conv layer dimension when using Convs for the RoI box head
__C.FAST_RCNN.CONV_HEAD_DIM = 256
# Number of stacked Conv layers in the RoI box head
__C.FAST_RCNN.NUM_STACKED_CONVS = 4

# RoI transformation function (e.g., RoIPool or RoIAlign)
# (RoIPoolF is the same as RoIPool; ignore the trailing 'F')
__C.FAST_RCNN.ROI_XFORM_METHOD = 'RoIPoolF'

# Number of grid sampling points in RoIAlign (usually use 2)
# Only applies to RoIAlign
__C.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO = 0

# RoI transform output resolution
# Note: some models may have constraints on what they can use, e.g. they use
# pretrained FC layers like in VGG16, and will ignore this option
__C.FAST_RCNN.ROI_XFORM_RESOLUTION = 14


# ---------------------------------------------------------------------------- #
# RPN options
# ---------------------------------------------------------------------------- #
__C.RPN = AttrDict()

# [Infered value; do not set directly in a config]
# Indicates that the model contains an RPN subnetwork
__C.RPN.RPN_ON = False

# RPN anchor sizes given in absolute pixels w.r.t. the scaled network input
# Note: these options are *not* used by FPN RPN; see FPN.RPN* options
__C.RPN.SIZES = (64, 128, 256, 512)

# Stride of the feature map that RPN is attached
__C.RPN.STRIDE = 16

# RPN anchor aspect ratios
__C.RPN.ASPECT_RATIOS = (0.5, 1, 2)


# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
__C.FPN = AttrDict()

# FPN is enabled if True
__C.FPN.FPN_ON = False

# Channel dimension of the FPN feature levels
__C.FPN.DIM = 256

# Initialize the lateral connections to output zero if True
__C.FPN.ZERO_INIT_LATERAL = False

# Stride of the coarsest FPN level
# This is needed so the input can be padded properly
__C.FPN.COARSEST_STRIDE = 32

#
# FPN may be used for just RPN, just object detection, or both
#

# Use FPN for RoI transform for object detection if True
__C.FPN.MULTILEVEL_ROIS = False
# Hyperparameters for the RoI-to-FPN level mapping heuristic
__C.FPN.ROI_CANONICAL_SCALE = 224  # s0
__C.FPN.ROI_CANONICAL_LEVEL = 4  # k0: where s0 maps to
# Coarsest level of the FPN pyramid
__C.FPN.ROI_MAX_LEVEL = 5
# Finest level of the FPN pyramid
__C.FPN.ROI_MIN_LEVEL = 2

# Use FPN for RPN if True
__C.FPN.MULTILEVEL_RPN = False
# Coarsest level of the FPN pyramid
__C.FPN.RPN_MAX_LEVEL = 6
# Finest level of the FPN pyramid
__C.FPN.RPN_MIN_LEVEL = 2
# FPN RPN anchor aspect ratios
__C.FPN.RPN_ASPECT_RATIOS = (0.5, 1, 2)
# RPN anchors start at this size on RPN_MIN_LEVEL
# The anchor size doubled each level after that
# With a default of 32 and levels 2 to 6, we get anchor sizes of 32 to 512
__C.FPN.RPN_ANCHOR_START_SIZE = 32
# Use extra FPN levels, as done in the RetinaNet paper
__C.FPN.EXTRA_CONV_LEVELS = False
# Use GroupNorm in the FPN-specific layers (lateral, etc.)
__C.FPN.USE_GN = False


# ---------------------------------------------------------------------------- #
# Mask R-CNN options ("MRCNN" means Mask R-CNN)
# ---------------------------------------------------------------------------- #
__C.MRCNN = AttrDict()

# The type of RoI head to use for instance mask prediction
# The string must match a function this is imported in modeling.model_builder
# (e.g., 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs')
__C.MRCNN.ROI_MASK_HEAD = ''

# Resolution of mask predictions
__C.MRCNN.RESOLUTION = 14

# RoI transformation function and associated options
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'

# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_RESOLUTION = 7

# Number of grid sampling points in RoIAlign (usually use 2)
# Only applies to RoIAlign
__C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0

# Number of channels in the mask head
__C.MRCNN.DIM_REDUCED = 256

# Use dilated convolution in the mask head
__C.MRCNN.DILATION = 2

# Upsample the predicted masks by this factor
__C.MRCNN.UPSAMPLE_RATIO = 1

# Use a fully-connected layer to predict the final masks instead of a conv layer
__C.MRCNN.USE_FC_OUTPUT = False

# Weight initialization method for the mask head and mask output layers
__C.MRCNN.CONV_INIT = 'GaussianFill'

# Use class specific mask predictions if True (otherwise use class agnostic mask
# predictions)
__C.MRCNN.CLS_SPECIFIC_MASK = True

# Multi-task loss weight for masks
__C.MRCNN.WEIGHT_LOSS_MASK = 1.0

# Binarization threshold for converting soft masks to hard masks
__C.MRCNN.THRESH_BINARIZE = 0.5


# ---------------------------------------------------------------------------- #
# Keypoint Mask R-CNN options ("KRCNN" = Mask R-CNN with Keypoint support)
# ---------------------------------------------------------------------------- #
__C.KRCNN = AttrDict()

# The type of RoI head to use for instance keypoint prediction
# The string must match a function this is imported in modeling.model_builder
# (e.g., 'keypoint_rcnn_heads.add_roi_pose_head_v1convX')
__C.KRCNN.ROI_KEYPOINTS_HEAD = ''

# Output size (and size loss is computed on), e.g., 56x56
__C.KRCNN.HEATMAP_SIZE = -1

# Use bilinear interpolation to upsample the final heatmap by this factor
__C.KRCNN.UP_SCALE = -1

# Apply a ConvTranspose layer to the hidden representation computed by the
# keypoint head prior to predicting the per-keypoint heatmaps
__C.KRCNN.USE_DECONV = False
# Channel dimension of the hidden representation produced by the ConvTranspose
__C.KRCNN.DECONV_DIM = 256

# Use a ConvTranspose layer to predict the per-keypoint heatmaps
__C.KRCNN.USE_DECONV_OUTPUT = False

# Use dilation in the keypoint head
__C.KRCNN.DILATION = 1

# Size of the kernels to use in all ConvTranspose operations
__C.KRCNN.DECONV_KERNEL = 4

# Number of keypoints in the dataset (e.g., 17 for COCO)
__C.KRCNN.NUM_KEYPOINTS = -1

# Number of stacked Conv layers in keypoint head
__C.KRCNN.NUM_STACKED_CONVS = 8

# Dimension of the hidden representation output by the keypoint head
__C.KRCNN.CONV_HEAD_DIM = 256

# Conv kernel size used in the keypoint head
__C.KRCNN.CONV_HEAD_KERNEL = 3
# Conv kernel weight filling function
__C.KRCNN.CONV_INIT = 'GaussianFill'

# Use NMS based on OKS if True
__C.KRCNN.NMS_OKS = False

# Source of keypoint confidence
#   Valid options: ('bbox', 'logit', 'prob')
__C.KRCNN.KEYPOINT_CONFIDENCE = 'bbox'

# Standard ROI XFORM options (see FAST_RCNN or MRCNN options)
__C.KRCNN.ROI_XFORM_METHOD = 'RoIAlign'
__C.KRCNN.ROI_XFORM_RESOLUTION = 7
__C.KRCNN.ROI_XFORM_SAMPLING_RATIO = 0

# Minimum number of labeled keypoints that must exist in a minibatch (otherwise
# the minibatch is discarded)
__C.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH = 20

# When infering the keypoint locations from the heatmap, don't scale the heatmap
# below this minimum size
__C.KRCNN.INFERENCE_MIN_SIZE = 0

# Multi-task loss weight to use for keypoints
# Recommended values:
#   - use 1.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is True
#   - use 4.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False
__C.KRCNN.LOSS_WEIGHT = 1.0

# Normalize by the total number of visible keypoints in the minibatch if True.
# Otherwise, normalize by the total number of keypoints that could ever exist
# in the minibatch. See comments in modeling.model_builder.add_keypoint_losses
# for detailed discussion.
__C.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS = True


# ---------------------------------------------------------------------------- #
# R-FCN options
# ---------------------------------------------------------------------------- #
__C.RFCN = AttrDict()

# Position-sensitive RoI pooling output grid size (height and width)
__C.RFCN.PS_GRID_SIZE = 3


# ---------------------------------------------------------------------------- #
# ResNets options ("ResNets" = ResNet and ResNeXt)
# ---------------------------------------------------------------------------- #
__C.RESNETS = AttrDict()

# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
__C.RESNETS.NUM_GROUPS = 1

# Baseline width of each group
__C.RESNETS.WIDTH_PER_GROUP = 64

# Place the stride 2 conv on the 1x1 filter
# Use True only for the original MSRA ResNet; use False for C2 and Torch models
__C.RESNETS.STRIDE_1X1 = True

# Residual transformation function
__C.RESNETS.TRANS_FUNC = 'bottleneck_transformation'
# ResNet's stem function (conv1 and pool1)
__C.RESNETS.STEM_FUNC = 'basic_bn_stem'
# ResNet's shortcut function
__C.RESNETS.SHORTCUT_FUNC = 'basic_bn_shortcut'

# Apply dilation in stage "res5"
__C.RESNETS.RES5_DILATION = 1


# ---------------------------------------------------------------------------- #
# GroupNorm options
# ---------------------------------------------------------------------------- #
__C.GROUP_NORM = AttrDict()
# Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS)
__C.GROUP_NORM.DIM_PER_GP = -1
# Number of groups in GroupNorm (-1 if using DIM_PER_GP)
__C.GROUP_NORM.NUM_GROUPS = 32
# GroupNorm's small constant in the denominator
__C.GROUP_NORM.EPSILON = 1e-5


# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #

# Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1

# Use NCCL for all reduce, otherwise use muji
# Warning: if set to True, you may experience deadlocks
__C.USE_NCCL = False

# The mapping from image coordinates to feature map coordinates might cause
# some boxes that are distinct in image space to become identical in feature
# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
# for identifying duplicate boxes.
# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
__C.DEDUP_BOXES = 1 / 16.

# Clip bounding box transformation predictions to prevent np.exp from
# overflowing
# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)

# Pixel mean values (BGR order) as a (1, 1, 3) array
# We use the same pixel mean for all networks even though it's not exactly what
# they were trained with
# "Fun" fact: the history of where these values comes from is lost
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])

# For reproducibility...but not really because modern fast GPU libraries use
# non-deterministic op implementations
__C.RNG_SEED = 3

# A small number that's used many times
__C.EPS = 1e-14

# Root directory of project
__C.ROOT_DIR = os.getcwd()

# Output basedir
__C.OUTPUT_DIR = '/tmp'

# Name (or path to) the matlab executable
__C.MATLAB = 'matlab'

# Reduce memory usage with memonger gradient blob sharing
__C.MEMONGER = True

# Futher reduce memory by allowing forward pass activations to be shared when
# possible. Note that this will cause activation blob inspection (values,
# shapes, etc.) to be meaningless when activation blobs are reused.
__C.MEMONGER_SHARE_ACTIVATIONS = False

# Dump detection visualizations
__C.VIS = False

# Score threshold for visualization
__C.VIS_TH = 0.9

# Expected results should take the form of a list of expectations, each
# specified by four elements (dataset, task, metric, expected value). For
# example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387]]
__C.EXPECTED_RESULTS = []
# Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS
__C.EXPECTED_RESULTS_RTOL = 0.1
__C.EXPECTED_RESULTS_ATOL = 0.005
# When the expected value specifies a mean and standard deviation, we check
# that the actual value is within mean +/- SIGMA_TOL * std
__C.EXPECTED_RESULTS_SIGMA_TOL = 4
# Set to send email in case of an EXPECTED_RESULTS failure
__C.EXPECTED_RESULTS_EMAIL = ''

# Models and proposals referred to by URL are downloaded to a local cache
# specified by DOWNLOAD_CACHE
__C.DOWNLOAD_CACHE = '/tmp/detectron-download-cache'


# ---------------------------------------------------------------------------- #
# Cluster options
# ---------------------------------------------------------------------------- #
__C.CLUSTER = AttrDict()

# Flag to indicate if the code is running in a cluster environment
__C.CLUSTER.ON_CLUSTER = False


# ---------------------------------------------------------------------------- #
# Deprecated options
# If an option is removed from the code and you don't want to break existing
# yaml configs, you can add the full config key as a string to the set below.
# ---------------------------------------------------------------------------- #
_DEPRECATED_KEYS = set(
    {
        'FINAL_MSG',
        'MODEL.DILATION',
        'ROOT_GPU_ID',
        'RPN.ON',
        'TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED',
        'TRAIN.DROPOUT',
        'USE_GPU_NMS',
        'TEST.NUM_TEST_IMAGES',
    }
)


# ---------------------------------------------------------------------------- #
# Renamed options
# If you rename a config option, record the mapping from the old name to the new
# name in the dictionary below. Optionally, if the type also changed, you can
# make the value a tuple that specifies first the renamed key and then
# instructions for how to edit the config file.
# ---------------------------------------------------------------------------- #
_RENAMED_KEYS = {
    'EXAMPLE.RENAMED.KEY': 'EXAMPLE.KEY',  # Dummy example to follow
    'MODEL.PS_GRID_SIZE': 'RFCN.PS_GRID_SIZE',
    'MODEL.ROI_HEAD': 'FAST_RCNN.ROI_BOX_HEAD',
    'MRCNN.MASK_HEAD_NAME': 'MRCNN.ROI_MASK_HEAD',
    'TRAIN.DATASET': (
        'TRAIN.DATASETS',
        "Also convert to a tuple, e.g., " +
        "'coco_2014_train' -> ('coco_2014_train',) or " +
        "'coco_2014_train:coco_2014_valminusminival' -> " +
        "('coco_2014_train', 'coco_2014_valminusminival')"
    ),
    'TRAIN.PROPOSAL_FILE': (
        'TRAIN.PROPOSAL_FILES',
        "Also convert to a tuple, e.g., " +
        "'path/to/file' -> ('path/to/file',) or " +
        "'path/to/file1:path/to/file2' -> " +
        "('path/to/file1', 'path/to/file2')"
    ),
    'TEST.SCALES': (
        'TEST.SCALE',
        "Also convert from a tuple, e.g. (600, ), " +
        "to a integer, e.g. 600."
    ),
    'TEST.DATASET': (
        'TEST.DATASETS',
        "Also convert from a string, e.g 'coco_2014_minival', " +
        "to a tuple, e.g. ('coco_2014_minival', )."
    ),
    'TEST.PROPOSAL_FILE': (
        'TEST.PROPOSAL_FILES',
        "Also convert from a string, e.g. '/path/to/props.pkl', " +
        "to a tuple, e.g. ('/path/to/props.pkl', )."
    ),
}


# ---------------------------------------------------------------------------- #
# Renamed modules
# If a module containing a data structure used in the config (e.g. AttrDict)
# is renamed/moved and you don't want to break loading of existing yaml configs
# (e.g. from weights files) you can specify the renamed module below.
# ---------------------------------------------------------------------------- #
_RENAMED_MODULES = {
    'utils.collections': 'detectron.utils.collections',
}


def assert_and_infer_cfg(cache_urls=True, make_immutable=True):
    """Call this function in your script after you have finished setting all cfg
    values that are necessary (e.g., merging a config from a file, merging
    command line config options, etc.). By default, this function will also
    mark the global cfg as immutable to prevent changing the global cfg settings
    during script execution (which can lead to hard to debug errors or code
    that's harder to understand than is necessary).
    """
    if __C.MODEL.RPN_ONLY or __C.MODEL.FASTER_RCNN:
        __C.RPN.RPN_ON = True
    if __C.RPN.RPN_ON or __C.RETINANET.RETINANET_ON:
        __C.TEST.PRECOMPUTED_PROPOSALS = False
    if cache_urls:
        cache_cfg_urls()
    if make_immutable:
        cfg.immutable(True)


def cache_cfg_urls():
    """Download URLs in the config, cache them locally, and rewrite cfg to make
    use of the locally cached file.
    """
    __C.TRAIN.WEIGHTS = cache_url(__C.TRAIN.WEIGHTS, __C.DOWNLOAD_CACHE)
    __C.TEST.WEIGHTS = cache_url(__C.TEST.WEIGHTS, __C.DOWNLOAD_CACHE)
    __C.TRAIN.PROPOSAL_FILES = tuple(
        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TRAIN.PROPOSAL_FILES
    )
    __C.TEST.PROPOSAL_FILES = tuple(
        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TEST.PROPOSAL_FILES
    )


def get_output_dir(datasets, training=True):
    """Get the output directory determined by the current global config."""
    assert isinstance(datasets, tuple([tuple, list] + list(six.string_types))), \
        'datasets argument must be of type tuple, list or string'
    is_string = isinstance(datasets, six.string_types)
    dataset_name = datasets if is_string else ':'.join(datasets)
    tag = 'train' if training else 'test'
    # <output-dir>/<train|test>/<dataset-name>/<model-type>/
    outdir = osp.join(__C.OUTPUT_DIR, tag, dataset_name, __C.MODEL.TYPE)
    if not osp.exists(outdir):
        os.makedirs(outdir)
    return outdir


def load_cfg(cfg_to_load):
    """Wrapper around yaml.load used for maintaining backward compatibility"""
    file_types = [file, io.IOBase] if six.PY2 else [io.IOBase]  # noqa false positive
    expected_types = tuple(file_types + list(six.string_types))
    assert isinstance(cfg_to_load, expected_types), \
        'Expected one of {}, got {}'.format(expected_types, type(cfg_to_load))
    if isinstance(cfg_to_load, tuple(file_types)):
        cfg_to_load = ''.join(cfg_to_load.readlines())
    for old_module, new_module in iteritems(_RENAMED_MODULES):
        # yaml object encoding: !!python/object/new:<module>.<object>
        old_module, new_module = 'new:' + old_module, 'new:' + new_module
        cfg_to_load = cfg_to_load.replace(old_module, new_module)
    # Import inline due to a circular dependency between env.py and config.py
    import detectron.utils.env as envu
    return envu.yaml_load(cfg_to_load)


def merge_cfg_from_file(cfg_filename):
    """Load a yaml config file and merge it into the global config."""
    with open(cfg_filename, 'r') as f:
        yaml_cfg = AttrDict(load_cfg(f))
    _merge_a_into_b(yaml_cfg, __C)


def merge_cfg_from_cfg(cfg_other):
    """Merge `cfg_other` into the global config."""
    _merge_a_into_b(cfg_other, __C)


def merge_cfg_from_list(cfg_list):
    """Merge config keys, values in a list (e.g., from command line) into the
    global config. For example, `cfg_list = ['TEST.NMS', 0.5]`.
    """
    assert len(cfg_list) % 2 == 0
    for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):
        if _key_is_deprecated(full_key):
            continue
        if _key_is_renamed(full_key):
            _raise_key_rename_error(full_key)
        key_list = full_key.split('.')
        d = __C
        for subkey in key_list[:-1]:
            assert subkey in d, 'Non-existent key: {}'.format(full_key)
            d = d[subkey]
        subkey = key_list[-1]
        assert subkey in d, 'Non-existent key: {}'.format(full_key)
        value = _decode_cfg_value(v)
        value = _check_and_coerce_cfg_value_type(
            value, d[subkey], subkey, full_key
        )
        d[subkey] = value


def _merge_a_into_b(a, b, stack=None):
    """Merge config dictionary a into config dictionary b, clobbering the
    options in b whenever they are also specified in a.
    """
    assert isinstance(a, AttrDict), \
        '`a` (cur type {}) must be an instance of {}'.format(type(a), AttrDict)
    assert isinstance(b, AttrDict), \
        '`b` (cur type {}) must be an instance of {}'.format(type(b), AttrDict)

    for k, v_ in a.items():
        full_key = '.'.join(stack) + '.' + k if stack is not None else k
        # a must specify keys that are in b
        if k not in b:
            if _key_is_deprecated(full_key):
                continue
            elif _key_is_renamed(full_key):
                _raise_key_rename_error(full_key)
            else:
                raise KeyError('Non-existent config key: {}'.format(full_key))

        v = copy.deepcopy(v_)
        v = _decode_cfg_value(v)
        v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key)

        # Recursively merge dicts
        if isinstance(v, AttrDict):
            try:
                stack_push = [k] if stack is None else stack + [k]
                _merge_a_into_b(v, b[k], stack=stack_push)
            except BaseException:
                raise
        else:
            b[k] = v


def _key_is_deprecated(full_key):
    if full_key in _DEPRECATED_KEYS:
        logger.warn(
            'Deprecated config key (ignoring): {}'.format(full_key)
        )
        return True
    return False


def _key_is_renamed(full_key):
    return full_key in _RENAMED_KEYS


def _raise_key_rename_error(full_key):
    new_key = _RENAMED_KEYS[full_key]
    if isinstance(new_key, tuple):
        msg = ' Note: ' + new_key[1]
        new_key = new_key[0]
    else:
        msg = ''
    raise KeyError(
        'Key {} was renamed to {}; please update your config.{}'.
        format(full_key, new_key, msg)
    )


def _decode_cfg_value(v):
    """Decodes a raw config value (e.g., from a yaml config files or command
    line argument) into a Python object.
    """
    # Configs parsed from raw yaml will contain dictionary keys that need to be
    # converted to AttrDict objects
    if isinstance(v, dict):
        return AttrDict(v)
    # All remaining processing is only applied to strings
    if not isinstance(v, six.string_types):
        return v
    # Try to interpret `v` as a:
    #   string, number, tuple, list, dict, boolean, or None
    try:
        v = literal_eval(v)
    # The following two excepts allow v to pass through when it represents a
    # string.
    #
    # Longer explanation:
    # The type of v is always a string (before calling literal_eval), but
    # sometimes it *represents* a string and other times a data structure, like
    # a list. In the case that v represents a string, what we got back from the
    # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is
    # ok with '"foo"', but will raise a ValueError if given 'foo'. In other
    # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval
    # will raise a SyntaxError.
    except ValueError:
        pass
    except SyntaxError:
        pass
    return v


def _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key):
    """Checks that `value_a`, which is intended to replace `value_b` is of the
    right type. The type is correct if it matches exactly or is one of a few
    cases in which the type can be easily coerced.
    """
    # The types must match (with some exceptions)
    type_b = type(value_b)
    type_a = type(value_a)
    if type_a is type_b:
        return value_a

    # Exceptions: numpy arrays, strings, tuple<->list
    if isinstance(value_b, np.ndarray):
        value_a = np.array(value_a, dtype=value_b.dtype)
    elif isinstance(value_b, six.string_types):
        value_a = str(value_a)
    elif isinstance(value_a, tuple) and isinstance(value_b, list):
        value_a = list(value_a)
    elif isinstance(value_a, list) and isinstance(value_b, tuple):
        value_a = tuple(value_a)
    else:
        raise ValueError(
            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
            'key: {}'.format(type_b, type_a, value_b, value_a, full_key)
        )
    return value_a


================================================
FILE: detectron/core/rpn_generator.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Functions for RPN proposal generation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import datetime
import logging
import numpy as np
import os

from caffe2.python import core
from caffe2.python import workspace

from detectron.core.config import cfg
from detectron.datasets import task_evaluation
from detectron.datasets.json_dataset import JsonDataset
from detectron.modeling import model_builder
from detectron.utils.io import save_object
from detectron.utils.timer import Timer
import detectron.utils.blob as blob_utils
import detectron.utils.c2 as c2_utils
import detectron.utils.env as envu
import detectron.utils.net as nu
import detectron.utils.subprocess as subprocess_utils

logger = logging.getLogger(__name__)


def generate_rpn_on_dataset(
    weights_file,
    dataset_name,
    _proposal_file_ignored,
    output_dir,
    multi_gpu=False,
    gpu_id=0
):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset(
            weights_file, dataset_name, _proposal_file_ignored, num_images,
            output_dir
        )
    else:
        # Processes entire dataset range by default
        _boxes, _scores, _ids, rpn_file = generate_rpn_on_range(
            weights_file,
            dataset_name,
            _proposal_file_ignored,
            output_dir,
            gpu_id=gpu_id
        )
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
    return evaluate_proposal_file(dataset, rpn_file, output_dir)


def multi_gpu_generate_rpn_on_dataset(
    weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir
):
    """Multi-gpu inference on a dataset."""
    # Retrieve the test_net binary path
    binary_dir = envu.get_runtime_dir()
    binary_ext = envu.get_py_bin_ext()
    binary = os.path.join(binary_dir, 'test_net' + binary_ext)
    assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary)

    # Pass the target dataset via the command line
    opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)]
    opts += ['TEST.WEIGHTS', weights_file]

    # Run inference in parallel in subprocesses
    outputs = subprocess_utils.process_in_parallel(
        'rpn_proposals', num_images, binary, output_dir, opts
    )

    # Collate the results from each subprocess
    boxes, scores, ids = [], [], []
    for rpn_data in outputs:
        boxes += rpn_data['boxes']
        scores += rpn_data['scores']
        ids += rpn_data['ids']
    rpn_file = os.path.join(output_dir, 'rpn_proposals.pkl')
    cfg_yaml = envu.yaml_dump(cfg)
    save_object(
        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
    )
    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
    return boxes, scores, ids, rpn_file


def generate_rpn_on_range(
    weights_file,
    dataset_name,
    _proposal_file_ignored,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN

    roidb, start_ind, end_ind, total_num_images = get_roidb(
        dataset_name, ind_range
    )
    logger.info(
        'Output will be saved to: {:s}'.format(os.path.abspath(output_dir))
    )

    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    nu.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)

    boxes, scores, ids = generate_proposals_on_roidb(
        model,
        roidb,
        start_ind=start_ind,
        end_ind=end_ind,
        total_num_images=total_num_images,
        gpu_id=gpu_id,
    )

    cfg_yaml = envu.yaml_dump(cfg)
    if ind_range is not None:
        rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range)
    else:
        rpn_name = 'rpn_proposals.pkl'
    rpn_file = os.path.join(output_dir, rpn_name)
    save_object(
        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
    )
    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
    return boxes, scores, ids, rpn_file


def generate_proposals_on_roidb(
    model, roidb, start_ind=None, end_ind=None, total_num_images=None,
    gpu_id=0,
):
    """Generate RPN proposals on all images in an imdb."""
    _t = Timer()
    num_images = len(roidb)
    roidb_boxes = [[] for _ in range(num_images)]
    roidb_scores = [[] for _ in range(num_images)]
    roidb_ids = [[] for _ in range(num_images)]
    if start_ind is None:
        start_ind = 0
        end_ind = num_images
        total_num_images = num_images
    for i in range(num_images):
        roidb_ids[i] = roidb[i]['id']
        im = cv2.imread(roidb[i]['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            _t.tic()
            roidb_boxes[i], roidb_scores[i] = im_proposals(model, im)
            _t.toc()
        if i % 10 == 0:
            ave_time = _t.average_time
            eta_seconds = ave_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                (
                    'rpn_generate: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, ave_time, eta
                )
            )

    return roidb_boxes, roidb_scores, roidb_ids


def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))
    workspace.RunNet(model.net.Proto().name)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = [
            core.ScopedName('rpn_rois_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        score_names = [
            core.ScopedName('rpn_roi_probs_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        blobs = workspace.FetchBlobs(rois_names + score_names)
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(blobs[:len(rois_names)])
        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = workspace.FetchBlobs(
            [core.ScopedName('rpn_rois'),
             core.ScopedName('rpn_roi_probs')]
        )
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores


def get_roidb(dataset_name, ind_range):
    """Get the roidb for the dataset specified in the global cfg. Optionally
    restrict it to a range of indices if ind_range is a pair of integers.
    """
    dataset = JsonDataset(dataset_name)
    roidb = dataset.get_roidb()

    if ind_range is not None:
        total_num_images = len(roidb)
        start, end = ind_range
        roidb = roidb[start:end]
    else:
        start = 0
        end = len(roidb)
        total_num_images = end

    return roidb, start, end, total_num_images


def evaluate_proposal_file(dataset, proposal_file, output_dir):
    """Evaluate box proposal average recall."""
    roidb = dataset.get_roidb(gt=True, proposal_file=proposal_file)
    results = task_evaluation.evaluate_box_proposals(dataset, roidb)
    task_evaluation.log_box_proposal_results(results)
    recall_file = os.path.join(output_dir, 'rpn_proposal_recall.pkl')
    save_object(results, recall_file)
    return results


================================================
FILE: detectron/core/test.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Inference functionality for most Detectron models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import defaultdict
import cv2
import logging
import numpy as np

from caffe2.python import core
from caffe2.python import workspace
import pycocotools.mask as mask_util

from detectron.core.config import cfg
from detectron.utils.timer import Timer
import detectron.core.test_retinanet as test_retinanet
import detectron.modeling.FPN as fpn
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils
import detectron.utils.image as image_utils
import detectron.utils.keypoints as keypoint_utils

logger = logging.getLogger(__name__)


def im_detect_all(model, im, box_proposals, timers=None):
    if timers is None:
        timers = defaultdict(Timer)

    # Handle RetinaNet testing separately for now
    if cfg.RETINANET.RETINANET_ON:
        cls_boxes = test_retinanet.im_detect_bbox(model, im, timers)
        return cls_boxes, None, None

    timers['im_detect_bbox'].tic()
    if cfg.TEST.BBOX_AUG.ENABLED:
        scores, boxes, im_scale = im_detect_bbox_aug(model, im, box_proposals)
    else:
        scores, boxes, im_scale = im_detect_bbox(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals
        )
    timers['im_detect_bbox'].toc()

    # score and boxes are from the whole image after score thresholding and nms
    # (they are not separated by class)
    # cls_boxes boxes and scores are separated by class and in the format used
    # for evaluating results
    timers['misc_bbox'].tic()
    scores, boxes, cls_boxes = box_results_with_nms_and_limit(scores, boxes)
    timers['misc_bbox'].toc()

    if cfg.MODEL.MASK_ON and boxes.shape[0] > 0:
        timers['im_detect_mask'].tic()
        if cfg.TEST.MASK_AUG.ENABLED:
            masks = im_detect_mask_aug(model, im, boxes)
        else:
            masks = im_detect_mask(model, im_scale, boxes)
        timers['im_detect_mask'].toc()

        timers['misc_mask'].tic()
        cls_segms = segm_results(
            cls_boxes, masks, boxes, im.shape[0], im.shape[1]
        )
        timers['misc_mask'].toc()
    else:
        cls_segms = None

    if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0:
        timers['im_detect_keypoints'].tic()
        if cfg.TEST.KPS_AUG.ENABLED:
            heatmaps = im_detect_keypoints_aug(model, im, boxes)
        else:
            heatmaps = im_detect_keypoints(model, im_scale, boxes)
        timers['im_detect_keypoints'].toc()

        timers['misc_keypoints'].tic()
        cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes)
        timers['misc_keypoints'].toc()
    else:
        cls_keyps = None

    return cls_boxes, cls_segms, cls_keyps


def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        im, target_scale, target_max_size
    )
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale


def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale


def im_detect_bbox_aug(model, im, box_proposals=None):
    """Performs bbox detection with test-time augmentations.
    Function signature is the same as for im_detect_bbox.
    """
    assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \
        'Size dependent scaling not implemented'
    assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \
        cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \
        'Coord heuristic must be union whenever score heuristic is union'
    assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \
        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \
        'Score heuristic must be union whenever coord heuristic is union'
    assert not cfg.MODEL.FASTER_RCNN or \
        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \
        'Union heuristic must be used to combine Faster RCNN predictions'

    # Collect detections computed under different transformations
    scores_ts = []
    boxes_ts = []

    def add_preds_t(scores_t, boxes_t):
        scores_ts.append(scores_t)
        boxes_ts.append(boxes_t)

    # Perform detection on the horizontally flipped image
    if cfg.TEST.BBOX_AUG.H_FLIP:
        scores_hf, boxes_hf, _ = im_detect_bbox_hflip(
            model,
            im,
            cfg.TEST.SCALE,
            cfg.TEST.MAX_SIZE,
            box_proposals=box_proposals
        )
        add_preds_t(scores_hf, boxes_hf)

    # Compute detections at different scales
    for scale in cfg.TEST.BBOX_AUG.SCALES:
        max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
        scores_scl, boxes_scl = im_detect_bbox_scale(
            model, im, scale, max_size, box_proposals
        )
        add_preds_t(scores_scl, boxes_scl)

        if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
            scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale(
                model, im, scale, max_size, box_proposals, hflip=True
            )
            add_preds_t(scores_scl_hf, boxes_scl_hf)

    # Perform detection at different aspect ratios
    for aspect_ratio in cfg.TEST.BBOX_AUG.ASPECT_RATIOS:
        scores_ar, boxes_ar = im_detect_bbox_aspect_ratio(
            model, im, aspect_ratio, box_proposals
        )
        add_preds_t(scores_ar, boxes_ar)

        if cfg.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP:
            scores_ar_hf, boxes_ar_hf = im_detect_bbox_aspect_ratio(
                model, im, aspect_ratio, box_proposals, hflip=True
            )
            add_preds_t(scores_ar_hf, boxes_ar_hf)

    # Compute detections for the original image (identity transform) last to
    # ensure that the Caffe2 workspace is populated with blobs corresponding
    # to the original image on return (postcondition of im_detect_bbox)
    scores_i, boxes_i, im_scale_i = im_detect_bbox(
        model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals
    )
    add_preds_t(scores_i, boxes_i)

    # Combine the predicted scores
    if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID':
        scores_c = scores_i
    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG':
        scores_c = np.mean(scores_ts, axis=0)
    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION':
        scores_c = np.vstack(scores_ts)
    else:
        raise NotImplementedError(
            'Score heur {} not supported'.format(cfg.TEST.BBOX_AUG.SCORE_HEUR)
        )

    # Combine the predicted boxes
    if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID':
        boxes_c = boxes_i
    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG':
        boxes_c = np.mean(boxes_ts, axis=0)
    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION':
        boxes_c = np.vstack(boxes_ts)
    else:
        raise NotImplementedError(
            'Coord heur {} not supported'.format(cfg.TEST.BBOX_AUG.COORD_HEUR)
        )

    return scores_c, boxes_c, im_scale_i


def im_detect_bbox_hflip(
    model, im, target_scale, target_max_size, box_proposals=None
):
    """Performs bbox detection on the horizontally flipped image.
    Function signature is the same as for im_detect_bbox.
    """
    # Compute predictions on the flipped image
    im_hf = im[:, ::-1, :]
    im_width = im.shape[1]

    if not cfg.MODEL.FASTER_RCNN:
        box_proposals_hf = box_utils.flip_boxes(box_proposals, im_width)
    else:
        box_proposals_hf = None

    scores_hf, boxes_hf, im_scale = im_detect_bbox(
        model, im_hf, target_scale, target_max_size, boxes=box_proposals_hf
    )

    # Invert the detections computed on the flipped image
    boxes_inv = box_utils.flip_boxes(boxes_hf, im_width)

    return scores_hf, boxes_inv, im_scale


def im_detect_bbox_scale(
    model, im, target_scale, target_max_size, box_proposals=None, hflip=False
):
    """Computes bbox detections at the given scale.
    Returns predictions in the original image space.
    """
    if hflip:
        scores_scl, boxes_scl, _ = im_detect_bbox_hflip(
            model, im, target_scale, target_max_size, box_proposals=box_proposals
        )
    else:
        scores_scl, boxes_scl, _ = im_detect_bbox(
            model, im, target_scale, target_max_size, boxes=box_proposals
        )
    return scores_scl, boxes_scl


def im_detect_bbox_aspect_ratio(
    model, im, aspect_ratio, box_proposals=None, hflip=False
):
    """Computes bbox detections at the given width-relative aspect ratio.
    Returns predictions in the original image space.
    """
    # Compute predictions on the transformed image
    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)

    if not cfg.MODEL.FASTER_RCNN:
        box_proposals_ar = box_utils.aspect_ratio(box_proposals, aspect_ratio)
    else:
        box_proposals_ar = None

    if hflip:
        scores_ar, boxes_ar, _ = im_detect_bbox_hflip(
            model,
            im_ar,
            cfg.TEST.SCALE,
            cfg.TEST.MAX_SIZE,
            box_proposals=box_proposals_ar
        )
    else:
        scores_ar, boxes_ar, _ = im_detect_bbox(
            model,
            im_ar,
            cfg.TEST.SCALE,
            cfg.TEST.MAX_SIZE,
            boxes=box_proposals_ar
        )

    # Invert the detected boxes
    boxes_inv = box_utils.aspect_ratio(boxes_ar, 1.0 / aspect_ratio)

    return scores_ar, boxes_inv


def im_detect_mask(model, im_scale, boxes):
    """Infer instance segmentation masks. This function must be called after
    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
    with the necessary blobs.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im_scales (list): image blob scales as returned by im_detect_bbox
        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
            returned by im_detect_bbox)

    Returns:
        pred_masks (ndarray): R x K x M x M array of class specific soft masks
            output by the network (must be processed by segm_results to convert
            into hard masks in the original image coordinate space)
    """
    M = cfg.MRCNN.RESOLUTION
    if boxes.shape[0] == 0:
        pred_masks = np.zeros((0, M, M), np.float32)
        return pred_masks

    inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)}
    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois_for_test(inputs, 'mask_rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.mask_net.Proto().name)

    # Fetch masks
    pred_masks = workspace.FetchBlob(
        core.ScopedName('mask_fcn_probs')
    ).squeeze()

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M])
    else:
        pred_masks = pred_masks.reshape([-1, 1, M, M])

    return pred_masks


def im_detect_mask_aug(model, im, boxes):
    """Performs mask detection with test-time augmentations.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): BGR image to test
        boxes (ndarray): R x 4 array of bounding boxes

    Returns:
        masks (ndarray): R x K x M x M array of class specific soft masks
    """
    assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \
        'Size dependent scaling not implemented'

    # Collect masks computed under different transformations
    masks_ts = []

    # Compute masks for the original image (identity transform)
    im_scale_i = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    masks_i = im_detect_mask(model, im_scale_i, boxes)
    masks_ts.append(masks_i)

    # Perform mask detection on the horizontally flipped image
    if cfg.TEST.MASK_AUG.H_FLIP:
        masks_hf = im_detect_mask_hflip(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes
        )
        masks_ts.append(masks_hf)

    # Compute detections at different scales
    for scale in cfg.TEST.MASK_AUG.SCALES:
        max_size = cfg.TEST.MASK_AUG.MAX_SIZE
        masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes)
        masks_ts.append(masks_scl)

        if cfg.TEST.MASK_AUG.SCALE_H_FLIP:
            masks_scl_hf = im_detect_mask_scale(
                model, im, scale, max_size, boxes, hflip=True
            )
            masks_ts.append(masks_scl_hf)

    # Compute masks at different aspect ratios
    for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS:
        masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes)
        masks_ts.append(masks_ar)

        if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP:
            masks_ar_hf = im_detect_mask_aspect_ratio(
                model, im, aspect_ratio, boxes, hflip=True
            )
            masks_ts.append(masks_ar_hf)

    # Combine the predicted soft masks
    if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG':
        masks_c = np.mean(masks_ts, axis=0)
    elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX':
        masks_c = np.amax(masks_ts, axis=0)
    elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG':

        def logit(y):
            return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20))

        logit_masks = [logit(y) for y in masks_ts]
        logit_masks = np.mean(logit_masks, axis=0)
        masks_c = 1.0 / (1.0 + np.exp(-logit_masks))
    else:
        raise NotImplementedError(
            'Heuristic {} not supported'.format(cfg.TEST.MASK_AUG.HEUR)
        )

    return masks_c


def im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes):
    """Performs mask detection on the horizontally flipped image.
    Function signature is the same as for im_detect_mask_aug.
    """
    # Compute the masks for the flipped image
    im_hf = im[:, ::-1, :]
    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])

    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)
    masks_hf = im_detect_mask(model, im_scale, boxes_hf)

    # Invert the predicted soft masks
    masks_inv = masks_hf[:, :, :, ::-1]

    return masks_inv


def im_detect_mask_scale(
    model, im, target_scale, target_max_size, boxes, hflip=False
):
    """Computes masks at the given scale."""
    if hflip:
        masks_scl = im_detect_mask_hflip(
            model, im, target_scale, target_max_size, boxes
        )
    else:
        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)
        masks_scl = im_detect_mask(model, im_scale, boxes)
    return masks_scl


def im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, hflip=False):
    """Computes mask detections at the given width-relative aspect ratio."""

    # Perform mask detection on the transformed image
    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)

    if hflip:
        masks_ar = im_detect_mask_hflip(
            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar
        )
    else:
        im_scale = im_conv_body_only(
            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE
        )
        masks_ar = im_detect_mask(model, im_scale, boxes_ar)

    return masks_ar


def im_detect_keypoints(model, im_scale, boxes):
    """Infer instance keypoint poses. This function must be called after
    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
    with the necessary blobs.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im_scales (list): image blob scales as returned by im_detect_bbox
        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
            returned by im_detect_bbox)

    Returns:
        pred_heatmaps (ndarray): R x J x M x M array of keypoint location
            logits (softmax inputs) for each of the J keypoint types output
            by the network (must be processed by keypoint_results to convert
            into point predictions in the original image coordinate space)
    """
    M = cfg.KRCNN.HEATMAP_SIZE
    if boxes.shape[0] == 0:
        pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32)
        return pred_heatmaps

    inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)}

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois_for_test(inputs, 'keypoint_rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.keypoint_net.Proto().name)

    pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze()

    # In case of 1
    if pred_heatmaps.ndim == 3:
        pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0)

    return pred_heatmaps


def im_detect_keypoints_aug(model, im, boxes):
    """Computes keypoint predictions with test-time augmentations.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): BGR image to test
        boxes (ndarray): R x 4 array of bounding boxes

    Returns:
        heatmaps (ndarray): R x J x M x M array of keypoint location logits
    """

    # Collect heatmaps predicted under different transformations
    heatmaps_ts = []
    # Tag predictions computed under downscaling and upscaling transformations
    ds_ts = []
    us_ts = []

    def add_heatmaps_t(heatmaps_t, ds_t=False, us_t=False):
        heatmaps_ts.append(heatmaps_t)
        ds_ts.append(ds_t)
        us_ts.append(us_t)

    # Compute the heatmaps for the original image (identity transform)
    im_scale = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    heatmaps_i = im_detect_keypoints(model, im_scale, boxes)
    add_heatmaps_t(heatmaps_i)

    # Perform keypoints detection on the horizontally flipped image
    if cfg.TEST.KPS_AUG.H_FLIP:
        heatmaps_hf = im_detect_keypoints_hflip(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes
        )
        add_heatmaps_t(heatmaps_hf)

    # Compute detections at different scales
    for scale in cfg.TEST.KPS_AUG.SCALES:
        ds_scl = scale < cfg.TEST.SCALE
        us_scl = scale > cfg.TEST.SCALE
        heatmaps_scl = im_detect_keypoints_scale(
            model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes
        )
        add_heatmaps_t(heatmaps_scl, ds_scl, us_scl)

        if cfg.TEST.KPS_AUG.SCALE_H_FLIP:
            heatmaps_scl_hf = im_detect_keypoints_scale(
                model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes, hflip=True
            )
            add_heatmaps_t(heatmaps_scl_hf, ds_scl, us_scl)

    # Compute keypoints at different aspect ratios
    for aspect_ratio in cfg.TEST.KPS_AUG.ASPECT_RATIOS:
        heatmaps_ar = im_detect_keypoints_aspect_ratio(
            model, im, aspect_ratio, boxes
        )
        add_heatmaps_t(heatmaps_ar)

        if cfg.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP:
            heatmaps_ar_hf = im_detect_keypoints_aspect_ratio(
                model, im, aspect_ratio, boxes, hflip=True
            )
            add_heatmaps_t(heatmaps_ar_hf)

    # Select the heuristic function for combining the heatmaps
    if cfg.TEST.KPS_AUG.HEUR == 'HM_AVG':
        np_f = np.mean
    elif cfg.TEST.KPS_AUG.HEUR == 'HM_MAX':
        np_f = np.amax
    else:
        raise NotImplementedError(
            'Heuristic {} not supported'.format(cfg.TEST.KPS_AUG.HEUR)
        )

    def heur_f(hms_ts):
        return np_f(hms_ts, axis=0)

    # Combine the heatmaps
    if cfg.TEST.KPS_AUG.SCALE_SIZE_DEP:
        heatmaps_c = combine_heatmaps_size_dep(
            heatmaps_ts, ds_ts, us_ts, boxes, heur_f
        )
    else:
        heatmaps_c = heur_f(heatmaps_ts)

    return heatmaps_c


def im_detect_keypoints_hflip(model, im, target_scale, target_max_size, boxes):
    """Computes keypoint predictions on the horizontally flipped image.
    Function signature is the same as for im_detect_keypoints_aug.
    """
    # Compute keypoints for the flipped image
    im_hf = im[:, ::-1, :]
    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])

    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)
    heatmaps_hf = im_detect_keypoints(model, im_scale, boxes_hf)

    # Invert the predicted keypoints
    heatmaps_inv = keypoint_utils.flip_heatmaps(heatmaps_hf)

    return heatmaps_inv


def im_detect_keypoints_scale(
    model, im, target_scale, target_max_size, boxes, hflip=False
):
    """Computes keypoint predictions at the given scale."""
    if hflip:
        heatmaps_scl = im_detect_keypoints_hflip(
            model, im, target_scale, target_max_size, boxes
        )
    else:
        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)
        heatmaps_scl = im_detect_keypoints(model, im_scale, boxes)
    return heatmaps_scl


def im_detect_keypoints_aspect_ratio(
    model, im, aspect_ratio, boxes, hflip=False
):
    """Detects keypoints at the given width-relative aspect ratio."""

    # Perform keypoint detectionon the transformed image
    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)

    if hflip:
        heatmaps_ar = im_detect_keypoints_hflip(
            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar
        )
    else:
        im_scale = im_conv_body_only(
            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE
        )
        heatmaps_ar = im_detect_keypoints(model, im_scale, boxes_ar)

    return heatmaps_ar


def combine_heatmaps_size_dep(hms_ts, ds_ts, us_ts, boxes, heur_f):
    """Combines heatmaps while taking object sizes into account."""
    assert len(hms_ts) == len(ds_ts) and len(ds_ts) == len(us_ts), \
        'All sets of hms must be tagged with downscaling and upscaling flags'

    # Classify objects into small+medium and large based on their box areas
    areas = box_utils.boxes_area(boxes)
    sm_objs = areas < cfg.TEST.KPS_AUG.AREA_TH
    l_objs = areas >= cfg.TEST.KPS_AUG.AREA_TH

    # Combine heatmaps computed under different transformations for each object
    hms_c = np.zeros_like(hms_ts[0])

    for i in range(hms_c.shape[0]):
        hms_to_combine = []
        for hms_t, ds_t, us_t in zip(hms_ts, ds_ts, us_ts):
            # Discard downscaling predictions for small and medium objects
            if sm_objs[i] and ds_t:
                continue
            # Discard upscaling predictions for large objects
            if l_objs[i] and us_t:
                continue
            hms_to_combine.append(hms_t[i])
        hms_c[i] = heur_f(hms_to_combine)

    return hms_c


def box_results_with_nms_and_limit(scores, boxes):
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(
            np.float32, copy=False
        )
        if cfg.TEST.SOFT_NMS.ENABLED:
            nms_dets, _ = box_utils.soft_nms(
                dets_j,
                sigma=cfg.TEST.SOFT_NMS.SIGMA,
                overlap_thresh=cfg.TEST.NMS,
                score_thresh=0.0001,
                method=cfg.TEST.SOFT_NMS.METHOD
            )
        else:
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]
        # Refine the post-NMS boxes using bounding-box voting
        if cfg.TEST.BBOX_VOTE.ENABLED:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                cfg.TEST.BBOX_VOTE.VOTE_TH,
                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
            )
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if cfg.TEST.DETECTIONS_PER_IM > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)]
        )
        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes


def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = ref_box[2] - ref_box[0] + 1
            h = ref_box[3] - ref_box[1] + 1
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1, x_0:x_1] = mask[
                (y_0 - ref_box[1]):(y_1 - ref_box[1]),
                (x_0 - ref_box[0]):(x_1 - ref_box[0])
            ]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F')
            )[0]
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms


def keypoint_results(cls_boxes, pred_heatmaps, ref_boxes):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_keyps = [[] for _ in range(num_classes)]
    person_idx = keypoint_utils.get_person_class_index()
    xy_preds = keypoint_utils.heatmaps_to_keypoints(pred_heatmaps, ref_boxes)

    # NMS OKS
    if cfg.KRCNN.NMS_OKS:
        keep = keypoint_utils.nms_oks(xy_preds, ref_boxes, 0.3)
        xy_preds = xy_preds[keep, :, :]
        ref_boxes = ref_boxes[keep, :]
        pred_heatmaps = pred_heatmaps[keep, :, :, :]
        cls_boxes[person_idx] = cls_boxes[person_idx][keep, :]

    kps = [xy_preds[i] for i in range(xy_preds.shape[0])]
    cls_keyps[person_idx] = kps
    return cls_keyps


def _get_rois_blob(im_rois, im_scale):
    """Converts RoIs into network inputs.

    Arguments:
        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
        im_scale_factors (list): scale factors as returned by _get_image_blob

    Returns:
        blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns
            [level, x1, y1, x2, y2]
    """
    rois, levels = _project_im_rois(im_rois, im_scale)
    rois_blob = np.hstack((levels, rois))
    return rois_blob.astype(np.float32, copy=False)


def _project_im_rois(im_rois, scales):
    """Project image RoIs into the image pyramid built by _get_image_blob.

    Arguments:
        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
        scales (list): scale factors as returned by _get_image_blob

    Returns:
        rois (ndarray): R x 4 matrix of projected RoI coordinates
        levels (ndarray): image pyramid levels used by each projected RoI
    """
    rois = im_rois.astype(float, copy=False) * scales
    levels = np.zeros((im_rois.shape[0], 1), dtype=int)
    return rois, levels


def _add_multilevel_rois_for_test(blobs, name):
    """Distributes a set of RoIs across FPN pyramid levels by creating new level
    specific RoI blobs.

    Arguments:
        blobs (dict): dictionary of blobs
        name (str): a key in 'blobs' identifying the source RoI blob

    Returns:
        [by ref] blobs (dict): new keys named by `name + 'fpn' + level`
            are added to dict each with a value that's an R_level x 5 ndarray of
            RoIs (see _get_rois_blob for format)
    """
    lvl_min = cfg.FPN.ROI_MIN_LEVEL
    lvl_max = cfg.FPN.ROI_MAX_LEVEL
    lvls = fpn.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max)
    fpn.add_multilevel_roi_blobs(
        blobs, name, blobs[name], lvls, lvl_min, lvl_max
    )


def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    return blobs, im_scale


================================================
FILE: detectron/core/test_engine.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Test a Detectron network on an imdb (image database)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import defaultdict
import cv2
import datetime
import logging
import numpy as np
import os

from caffe2.python import workspace

from detectron.core.config import cfg
from detectron.core.config import get_output_dir
from detectron.core.rpn_generator import generate_rpn_on_dataset
from detectron.core.rpn_generator import generate_rpn_on_range
from detectron.core.test import im_detect_all
from detectron.datasets import task_evaluation
from detectron.datasets.json_dataset import JsonDataset
from detectron.modeling import model_builder
from detectron.utils.io import save_object
from detectron.utils.timer import Timer
import detectron.utils.c2 as c2_utils
import detectron.utils.env as envu
import detectron.utils.net as net_utils
import detectron.utils.subprocess as subprocess_utils
import detectron.utils.vis as vis_utils

logger = logging.getLogger(__name__)


def get_eval_functions():
    # Determine which parent or child function should handle inference
    if cfg.MODEL.RPN_ONLY:
        child_func = generate_rpn_on_range
        parent_func = generate_rpn_on_dataset
    else:
        # Generic case that handles all network types other than RPN-only nets
        # and RetinaNet
        child_func = test_net
        parent_func = test_net_on_dataset

    return parent_func, child_func


def get_inference_dataset(index, is_parent=True):
    assert is_parent or len(cfg.TEST.DATASETS) == 1, \
        'The child inference process can only work on a single dataset'

    dataset_name = cfg.TEST.DATASETS[index]

    if cfg.TEST.PRECOMPUTED_PROPOSALS:
        assert is_parent or len(cfg.TEST.PROPOSAL_FILES) == 1, \
            'The child inference process can only work on a single proposal file'
        assert len(cfg.TEST.PROPOSAL_FILES) == len(cfg.TEST.DATASETS), \
            'If proposals are used, one proposal file must be specified for ' \
            'each dataset'
        proposal_file = cfg.TEST.PROPOSAL_FILES[index]
    else:
        proposal_file = None

    return dataset_name, proposal_file


def run_inference(
    weights_file, ind_range=None,
    multi_gpu_testing=False, gpu_id=0,
    check_expected_results=False,
):
    parent_func, child_func = get_eval_functions()
    is_parent = ind_range is None

    def result_getter():
        if is_parent:
            # Parent case:
            # In this case we're either running inference on the entire dataset in a
            # single process or (if multi_gpu_testing is True) using this process to
            # launch subprocesses that each run inference on a range of the dataset
            all_results = {}
            for i in range(len(cfg.TEST.DATASETS)):
                dataset_name, proposal_file = get_inference_dataset(i)
                output_dir = get_output_dir(dataset_name, training=False)
                results = parent_func(
                    weights_file,
                    dataset_name,
                    proposal_file,
                    output_dir,
                    multi_gpu=multi_gpu_testing
                )
                all_results.update(results)

            return all_results
        else:
            # Subprocess child case:
            # In this case test_net was called via subprocess.Popen to execute on a
            # range of inputs on a single dataset
            dataset_name, proposal_file = get_inference_dataset(0, is_parent=False)
            output_dir = get_output_dir(dataset_name, training=False)
            return child_func(
                weights_file,
                dataset_name,
                proposal_file,
                output_dir,
                ind_range=ind_range,
                gpu_id=gpu_id
            )

    all_results = result_getter()
    if check_expected_results and is_parent:
        task_evaluation.check_expected_results(
            all_results,
            atol=cfg.EXPECTED_RESULTS_ATOL,
            rtol=cfg.EXPECTED_RESULTS_RTOL
        )
        task_evaluation.log_copy_paste_friendly_results(all_results)

    return all_results


def test_net_on_dataset(
    weights_file,
    dataset_name,
    proposal_file,
    output_dir,
    multi_gpu=False,
    gpu_id=0
):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(
            weights_file, dataset_name, proposal_file, num_images, output_dir
        )
    else:
        all_boxes, all_segms, all_keyps = test_net(
            weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id
        )
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
    results = task_evaluation.evaluate_all(
        dataset, all_boxes, all_segms, all_keyps, output_dir
    )
    return results


def multi_gpu_test_net_on_dataset(
    weights_file, dataset_name, proposal_file, num_images, output_dir
):
    """Multi-gpu inference on a dataset."""
    binary_dir = envu.get_runtime_dir()
    binary_ext = envu.get_py_bin_ext()
    binary = os.path.join(binary_dir, 'test_net' + binary_ext)
    assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary)

    # Pass the target dataset and proposal file (if any) via the command line
    opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)]
    opts += ['TEST.WEIGHTS', weights_file]
    if proposal_file:
        opts += ['TEST.PROPOSAL_FILES', '("{}",)'.format(proposal_file)]

    # Run inference in parallel in subprocesses
    # Outputs will be a list of outputs from each subprocess, where the output
    # of each subprocess is the dictionary saved by test_net().
    outputs = subprocess_utils.process_in_parallel(
        'detection', num_images, binary, output_dir, opts
    )

    # Collate the results from each subprocess
    all_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    all_segms = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    all_keyps = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for det_data in outputs:
        all_boxes_batch = det_data['all_boxes']
        all_segms_batch = det_data['all_segms']
        all_keyps_batch = det_data['all_keyps']
        for cls_idx in range(1, cfg.MODEL.NUM_CLASSES):
            all_boxes[cls_idx] += all_boxes_batch[cls_idx]
            all_segms[cls_idx] += all_segms_batch[cls_idx]
            all_keyps[cls_idx] += all_keyps_batch[cls_idx]
    det_file = os.path.join(output_dir, 'detections.pkl')
    cfg_yaml = envu.yaml_dump(cfg)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))

    return all_boxes, all_segms, all_keyps


def test_net(
    weights_file,
    dataset_name,
    proposal_file,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )
    model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                model, im, box_proposals, timers
            )

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time +
                timers['im_detect_mask'].average_time +
                timers['im_detect_keypoints'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time +
                timers['misc_mask'].average_time +
                timers['misc_keypoints'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'),
                cls_boxes_i,
                segms=cls_segms_i,
                keypoints=cls_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )

    cfg_yaml = envu.yaml_dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps


def initialize_model_from_cfg(weights_file, gpu_id=0):
    """Initialize a model from the global cfg. Loads test-time weights and
    creates the networks in the Caffe2 workspace.
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    net_utils.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)
    workspace.CreateNet(model.conv_body_net)
    if cfg.MODEL.MASK_ON:
        workspace.CreateNet(model.mask_net)
    if cfg.MODEL.KEYPOINTS_ON:
        workspace.CreateNet(model.keypoint_net)
    return model


def get_roidb_and_dataset(dataset_name, proposal_file, ind_range):
    """Get the roidb for the dataset specified in the global cfg. Optionally
    restrict it to a range of indices if ind_range is a pair of integers.
    """
    dataset = JsonDataset(dataset_name)
    if cfg.TEST.PRECOMPUTED_PROPOSALS:
        assert proposal_file, 'No proposal file given'
        roidb = dataset.get_roidb(
            proposal_file=proposal_file,
            proposal_limit=cfg.TEST.PROPOSAL_LIMIT
        )
    else:
        roidb = dataset.get_roidb()

    if ind_range is not None:
        total_num_images = len(roidb)
        start, end = ind_range
        roidb = roidb[start:end]
    else:
        start = 0
        end = len(roidb)
        total_num_images = end

    return roidb, dataset, start, end, total_num_images


def empty_results(num_classes, num_images):
    """Return empty results lists for boxes, masks, and keypoints.
    Box detections are collected into:
      all_boxes[cls][image] = N x 5 array with columns (x1, y1, x2, y2, score)
    Instance mask predictions are collected into:
      all_segms[cls][image] = [...] list of COCO RLE encoded masks that are in
      1:1 correspondence with the boxes in all_boxes[cls][image]
    Keypoint predictions are collected into:
      all_keyps[cls][image] = [...] list of keypoints results, each encoded as
      a 3D array (#rois, 4, #keypoints) with the 4 rows corresponding to
      [x, y, logit, prob] (See: utils.keypoints.heatmaps_to_keypoints).
      Keypoints are recorded for person (cls = 1); they are in 1:1
      correspondence with the boxes in all_boxes[cls][image].
    """
    # Note: do not be tempted to use [[] * N], which gives N references to the
    # *same* empty list.
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    all_segms = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    all_keyps = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    return all_boxes, all_segms, all_keyps


def extend_results(index, all_res, im_res):
    """Add results for an image to the set of all results at the specified
    index.
    """
    # Skip cls_idx 0 (__background__)
    for cls_idx in range(1, len(im_res)):
        all_res[cls_idx][index] = im_res[cls_idx]


================================================
FILE: detectron/core/test_retinanet.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Test a RetinaNet network on an image database"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging
from collections import defaultdict

from caffe2.python import core, workspace

from detectron.core.config import cfg
from detectron.modeling.generate_anchors import generate_anchors
from detectron.utils.timer import Timer
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils

logger = logging.getLogger(__name__)


def _create_cell_anchors():
    """
    Generate all types of anchors for all fpn levels/scales/aspect ratios.
    This function is called only once at the beginning of inference.
    """
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE
    A = scales_per_octave * len(aspect_ratios)
    anchors = {}
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = np.zeros((A, 4))
        a = 0
        for octave in range(scales_per_octave):
            octave_scale = 2 ** (octave / float(scales_per_octave))
            for aspect in aspect_ratios:
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect, )
                cell_anchors[a, :] = generate_anchors(
                    stride=stride, sizes=anchor_sizes,
                    aspect_ratios=anchor_aspect_ratios)
                a += 1
        anchors[lvl] = cell_anchors
    return anchors


def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        if cfg.TEST.SOFT_NMS.ENABLED:
            cls_dets, keep = box_utils.soft_nms(
                cls_dets,
                sigma=cfg.TEST.SOFT_NMS.SIGMA,
                overlap_thresh=cfg.TEST.NMS,
                score_thresh=0.0001,
                method=cfg.TEST.SOFT_NMS.METHOD
            )
        else:
            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes


================================================
FILE: detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m
================================================
function VOCopts = get_voc_opts(path)

tmp = pwd;
cd(path);
try
  addpath('VOCcode');
  VOCinit;
catch
  rmpath('VOCcode');
  cd(tmp);
  error(sprintf('VOCcode directory not found under %s', path));
end
rmpath('VOCcode');
cd(tmp);


================================================
FILE: detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m
================================================
function res = voc_eval(path, comp_id, test_set, output_dir)

VOCopts = get_voc_opts(path);
VOCopts.testset = test_set;

for i = 1:length(VOCopts.classes)
  cls = VOCopts.classes{i};
  res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
end

fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
fprintf('Results:\n');
aps = [res(:).ap]';
fprintf('%.1f\n', aps * 100);
fprintf('%.1f\n', mean(aps) * 100);
fprintf('~~~~~~~~~~~~~~~~~~~~\n');

function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)

test_set = VOCopts.testset;
year = VOCopts.dataset(4:end);

addpath(fullfile(VOCopts.datadir, 'VOCcode'));

res_fn = sprintf(VOCopts.detrespath, comp_id, cls);

recall = [];
prec = [];
ap = 0;
ap_auc = 0;

do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
if do_eval
  % Bug in VOCevaldet requires that tic has been called first
  tic;
  [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
  ap_auc = xVOCap(recall, prec);

  % force plot limits
  ylim([0 1]);
  xlim([0 1]);

  print(gcf, '-djpeg', '-r0', ...
        [output_dir '/' cls '_pr.jpg']);
end
fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);

res.recall = recall;
res.prec = prec;
res.ap = ap;
res.ap_auc = ap_auc;

save([output_dir '/' cls '_pr.mat'], ...
     'res', 'recall', 'prec', 'ap', 'ap_auc');

rmpath(fullfile(VOCopts.datadir, 'VOCcode'));


================================================
FILE: detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m
================================================
function ap = xVOCap(rec,prec)
% From the PASCAL VOC 2011 devkit

mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
    mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


================================================
FILE: detectron/datasets/__init__.py
================================================


================================================
FILE: detectron/datasets/cityscapes_json_dataset_evaluator.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Functions for evaluating results on Cityscapes."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import logging
import os
import uuid

import pycocotools.mask as mask_util

from detectron.core.config import cfg
from detectron.datasets.dataset_catalog import get_raw_dir

logger = logging.getLogger(__name__)


def evaluate_masks(
    json_dataset,
    all_boxes,
    all_segms,
    output_dir,
    use_salt=True,
    cleanup=False
):
    if cfg.CLUSTER.ON_CLUSTER:
        # On the cluster avoid saving these files in the job directory
        output_dir = '/tmp'
    res_file = os.path.join(
        output_dir, 'segmentations_' + json_dataset.name + '_results')
    if use_salt:
        res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'

    results_dir = os.path.join(output_dir, 'results')
    if not os.path.exists(results_dir):
        os.mkdir(results_dir)

    os.environ['CITYSCAPES_DATASET'] = get_raw_dir(json_dataset.name)
    os.environ['CITYSCAPES_RESULTS'] = output_dir

    # Load the Cityscapes eval script *after* setting the required env vars,
    # since the script reads their values into global variables (at load time).
    import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
        as cityscapes_eval

    roidb = json_dataset.get_roidb()
    for i, entry in enumerate(roidb):
        im_name = entry['image']

        basename = os.path.splitext(os.path.basename(im_name))[0]
        txtname = os.path.join(output_dir, basename + 'pred.txt')
        with open(txtname, 'w') as fid_txt:
            if i % 10 == 0:
                logger.info('i: {}: {}'.format(i, basename))
            for j in range(1, len(all_segms)):
                clss = json_dataset.classes[j]
                clss_id = cityscapes_eval.name2label[clss].id
                segms = all_segms[j][i]
                boxes = all_boxes[j][i]
                if segms == []:
                    continue
                masks = mask_util.decode(segms)

                for k in range(boxes.shape[0]):
                    score = boxes[k, -1]
                    mask = masks[:, :, k]
                    pngname = os.path.join(
                        'results',
                        basename + '_' + clss + '_{}.png'.format(k))
                    # write txt
                    fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score))
                    # save mask
                    cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)
    logger.info('Evaluating...')
    cityscapes_eval.main([])
    return None


================================================
FILE: detectron/datasets/coco_to_cityscapes_id.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# mapping coco categories to cityscapes (our converted json) id
# cityscapes
# INFO roidb.py: 220: 1       bicycle: 7286
# INFO roidb.py: 220: 2           car: 53684
# INFO roidb.py: 220: 3        person: 35704
# INFO roidb.py: 220: 4         train: 336
# INFO roidb.py: 220: 5         truck: 964
# INFO roidb.py: 220: 6    motorcycle: 1468
# INFO roidb.py: 220: 7           bus: 758
# INFO roidb.py: 220: 8         rider: 3504

# coco (val5k)
# INFO roidb.py: 220: 1        person: 21296
# INFO roidb.py: 220: 2       bicycle: 628
# INFO roidb.py: 220: 3           car: 3818
# INFO roidb.py: 220: 4    motorcycle: 732
# INFO roidb.py: 220: 5      airplane: 286 <------ irrelevant
# INFO roidb.py: 220: 6           bus: 564
# INFO roidb.py: 220: 7         train: 380
# INFO roidb.py: 220: 8         truck: 828


def cityscapes_to_coco(cityscapes_id):
    lookup = {
        0: 0,  # ... background
        1: 2,  # bicycle
        2: 3,  # car
        3: 1,  # person
        4: 7,  # train
        5: 8,  # truck
        6: 4,  # motorcycle
        7: 6,  # bus
        8: -1,  # rider (-1 means rand init)
    }
    return lookup[cityscapes_id]


def cityscapes_to_coco_with_rider(cityscapes_id):
    lookup = {
        0: 0,  # ... background
        1: 2,  # bicycle
        2: 3,  # car
        3: 1,  # person
        4: 7,  # train
        5: 8,  # truck
        6: 4,  # motorcycle
        7: 6,  # bus
        8: 1,  # rider ("person", *rider has human right!*)
    }
    return lookup[cityscapes_id]


def cityscapes_to_coco_without_person_rider(cityscapes_id):
    lookup = {
        0: 0,  # ... background
        1: 2,  # bicycle
        2: 3,  # car
        3: -1,  # person (ignore)
        4: 7,  # train
        5: 8,  # truck
        6: 4,  # motorcycle
        7: 6,  # bus
        8: -1,  # rider (ignore)
    }
    return lookup[cityscapes_id]


def cityscapes_to_coco_all_random(cityscapes_id):
    lookup = {
        0: -1,  # ... background
        1: -1,  # bicycle
        2: -1,  # car
        3: -1,  # person (ignore)
        4: -1,  # train
        5: -1,  # truck
        6: -1,  # motorcycle
        7: -1,  # bus
        8: -1,  # rider (ignore)
    }
    return lookup[cityscapes_id]


================================================
FILE: detectron/datasets/data/README.md
================================================
# Setting Up Datasets

This directory contains symlinks to data locations.

## Creating Symlinks for COCO

Symlink the COCO dataset:

```
ln -s /path/to/coco $DETECTRON/detectron/datasets/data/coco
```

We assume that your local COCO dataset copy at `/path/to/coco` has the following directory structure:

```
coco
|_ coco_train2014
|  |_ <im-1-name>.jpg
|  |_ ...
|  |_ <im-N-name>.jpg
|_ coco_val2014
|_ ...
|_ annotations
   |_ instances_train2014.json
   |_ ...
```

If that is not the case, you may need to do something similar to:

```
mkdir -p $DETECTRON/detectron/datasets/data/coco
ln -s /path/to/coco_train2014 $DETECTRON/detectron/datasets/data/coco/coco_train2014
ln -s /path/to/coco_val2014 $DETECTRON/detectron/datasets/data/coco/coco_val2014
ln -s /path/to/json/annotations $DETECTRON/detectron/datasets/data/coco/annotations
```

### COCO Minival Annotations

Our custom `minival` and `valminusminival` annotations are available for download [here](https://dl.fbaipublicfiles.com/detectron/coco/coco_annotations_minival.tgz).
Please note that `minival` is exactly equivalent to the recently defined 2017 `val` set.
Similarly, the union of `valminusminival` and the 2014 `train` is exactly equivalent to the 2017 `train` set. To complete installation of the COCO dataset, you will need to copy the `minival` and `valminusminival` json annotation files to the `coco/annotations` directory referenced above.

## Creating Symlinks for PASCAL VOC

We assume that your symlinked `detectron/datasets/data/VOC<year>` directory has the following structure:

```
VOC<year>
|_ JPEGImages
|  |_ <im-1-name>.jpg
|  |_ ...
|  |_ <im-N-name>.jpg
|_ annotations
|  |_ voc_<year>_train.json
|  |_ voc_<year>_val.json
|  |_ ...
|_ VOCdevkit<year>
```

Create symlinks for `VOC<year>`:

```
mkdir -p $DETECTRON/detectron/datasets/data/VOC<year>
ln -s /path/to/VOC<year>/JPEGImages $DETECTRON/detectron/datasets/data/VOC<year>/JPEGImages
ln -s /path/to/VOC<year>/json/annotations $DETECTRON/detectron/datasets/data/VOC<year>/annotations
ln -s /path/to/VOC<year>/devkit $DETECTRON/detectron/datasets/data/VOC<year>/VOCdevkit<year>
```

### PASCAL VOC Annotations in COCO Format

We expect PASCAL VOC annotations converted to COCO json format, which are available for download [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip ).

## Creating Symlinks for Cityscapes:

We assume that your symlinked `detectron/datasets/data/cityscapes` directory has the following structure:

```
cityscapes
|_ images
|  |_ <im-1-name>.jpg
|  |_ ...
|  |_ <im-N-name>.jpg
|_ annotations
|  |_ instanceonly_gtFile_train.json
|  |_ ...
|_ raw
   |_ gtFine
   |_ ...
   |_ README.md
```

Create symlinks for `cityscapes`:

```
mkdir -p $DETECTRON/detectron/datasets/data/cityscapes
ln -s /path/to/cityscapes/images $DETECTRON/detectron/datasets/data/cityscapes/images
ln -s /path/to/cityscapes/json/annotations $DETECTRON/detectron/datasets/data/cityscapes/annotations
ln -s /path/to/cityscapes/root $DETECTRON/detectron/datasets/data/cityscapes/raw
```

### Cityscapes Annotations in COCO Format

We expect Cityscapes annotations converted to COCO json format, which we will make available for download soon.


================================================
FILE: detectron/datasets/dataset_catalog.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Collection of available datasets."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os


# Path to data dir
_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')

# Required dataset entry keys
_IM_DIR = 'image_directory'
_ANN_FN = 'annotation_file'

# Optional dataset entry keys
_IM_PREFIX = 'image_prefix'
_DEVKIT_DIR = 'devkit_directory'
_RAW_DIR = 'raw_dir'

# Available datasets
_DATASETS = {
    'cityscapes_fine_instanceonly_seg_train': {
        _IM_DIR:
            _DATA_DIR + '/cityscapes/images',
        _ANN_FN:
            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_train.json',
        _RAW_DIR:
            _DATA_DIR + '/cityscapes/raw'
    },
    'cityscapes_fine_instanceonly_seg_val': {
        _IM_DIR:
            _DATA_DIR + '/cityscapes/images',
        # use filtered validation as there is an issue converting contours
        _ANN_FN:
            _DATA_DIR + '/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
        _RAW_DIR:
            _DATA_DIR + '/cityscapes/raw'
    },
    'cityscapes_fine_instanceonly_seg_test': {
        _IM_DIR:
            _DATA_DIR + '/cityscapes/images',
        _ANN_FN:
            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_test.json',
        _RAW_DIR:
            _DATA_DIR + '/cityscapes/raw'
    },
    'coco_2014_train': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_train2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/instances_train2014.json'
    },
    'coco_2014_val': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/instances_val2014.json'
    },
    'coco_2014_minival': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/instances_minival2014.json'
    },
    'coco_2014_valminusminival': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/instances_valminusminival2014.json'
    },
    'coco_2015_test': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test2015.json'
    },
    'coco_2015_test-dev': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'
    },
    'coco_2017_test': {  # 2017 test uses 2015 test images
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test2017.json',
        _IM_PREFIX:
            'COCO_test2015_'
    },
    'coco_2017_test-dev': {  # 2017 test-dev uses 2015 test images
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test-dev2017.json',
        _IM_PREFIX:
            'COCO_test2015_'
    },
    'coco_stuff_train': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_train2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/coco_stuff_train.json'
    },
    'coco_stuff_val': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/coco_stuff_val.json'
    },
    'keypoints_coco_2014_train': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_train2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/person_keypoints_train2014.json'
    },
    'keypoints_coco_2014_val': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/person_keypoints_val2014.json'
    },
    'keypoints_coco_2014_minival': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/person_keypoints_minival2014.json'
    },
    'keypoints_coco_2014_valminusminival': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_val2014',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/person_keypoints_valminusminival2014.json'
    },
    'keypoints_coco_2015_test': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test2015.json'
    },
    'keypoints_coco_2015_test-dev': {
        _IM_DIR:
            _DATA_DIR + '/coco/coco_test2015',
        _ANN_FN:
            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'
    },
    'voc_2007_train': {
        _IM_DIR:
            _DATA_DIR + '/VOC2007/JPEGImages',
        _ANN_FN:
            _DATA_DIR + '/VOC2007/annotations/voc_2007_train.json',
        _DEVKIT_DIR:
            _DATA_DIR + '/VOC2007/VOCdevkit2007'
    },
    'voc_2007_val': {
        _IM_DIR:
            _DATA_DIR + '/VOC2007/JPEGImages',
        _ANN_FN:
            _DATA_DIR + '/VOC2007/annotations/voc_2007_val.json',
        _DEVKIT_DIR:
            _DATA_DIR + '/VOC2007/VOCdevkit2007'
    },
    'voc_2007_test': {
        _IM_DIR:
            _DATA_DIR + '/VOC2007/JPEGImages',
        _ANN_FN:
            _DATA_DIR + '/VOC2007/annotations/voc_2007_test.json',
        _DEVKIT_DIR:
            _DATA_DIR + '/VOC2007/VOCdevkit2007'
    },
    'voc_2012_train': {
        _IM_DIR:
            _DATA_DIR + '/VOC2012/JPEGImages',
        _ANN_FN:
            _DATA_DIR + '/VOC2012/annotations/voc_2012_train.json',
        _DEVKIT_DIR:
            _DATA_DIR + '/VOC2012/VOCdevkit2012'
    },
    'voc_2012_val': {
        _IM_DIR:
            _DATA_DIR + '/VOC2012/JPEGImages',
        _ANN_FN:
            _DATA_DIR + '/VOC2012/annotations/voc_2012_val.json',
        _DEVKIT_DIR:
            _DATA_DIR + '/VOC2012/VOCdevkit2012'
    }
}


def datasets():
    """Retrieve the list of available dataset names."""
    return _DATASETS.keys()


def contains(name):
    """Determine if the dataset is in the catalog."""
    return name in _DATASETS.keys()


def get_im_dir(name):
    """Retrieve the image directory for the dataset."""
    return _DATASETS[name][_IM_DIR]


def get_ann_fn(name):
    """Retrieve the annotation file for the dataset."""
    return _DATASETS[name][_ANN_FN]


def get_im_prefix(name):
    """Retrieve the image prefix for the dataset."""
    return _DATASETS[name][_IM_PREFIX] if _IM_PREFIX in _DATASETS[name] else ''


def get_devkit_dir(name):
    """Retrieve the devkit dir for the dataset."""
    return _DATASETS[name][_DEVKIT_DIR]


def get_raw_dir(name):
    """Retrieve the raw dir for the dataset."""
    return _DATASETS[name][_RAW_DIR]


================================================
FILE: detectron/datasets/dummy_datasets.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Provide stub objects that can act as stand-in "dummy" datasets for simple use
cases, like getting all classes in a dataset. This exists so that demos can be
run without requiring users to download/install datasets first.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.utils.collections import AttrDict


def get_coco_dataset():
    """A dummy COCO dataset that includes only the 'classes' field."""
    ds = AttrDict()
    classes = [
        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
        'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
        'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
        'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
        'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
        'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    ds.classes = {i: name for i, name in enumerate(classes)}
    return ds


================================================
FILE: detectron/datasets/json_dataset.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Representation of the standard COCO json dataset format.

When working with a new dataset, we strongly suggest to convert the dataset into
the COCO json format and use the existing code; it is not recommended to write
code to support new dataset formats.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import logging
import numpy as np
import os
import scipy.sparse

# Must happen before importing COCO API (which imports matplotlib)
import detectron.utils.env as envu
envu.set_up_matplotlib()
# COCO API
from pycocotools import mask as COCOmask
from pycocotools.coco import COCO

from detectron.core.config import cfg
from detectron.utils.timer import Timer
import detectron.datasets.dataset_catalog as dataset_catalog
import detectron.utils.boxes as box_utils
from detectron.utils.io import load_object
import detectron.utils.segms as segm_utils

logger = logging.getLogger(__name__)


class JsonDataset:
    """A class representing a COCO json dataset."""

    def __init__(self, name):
        assert dataset_catalog.contains(name), \
            'Unknown dataset name: {}'.format(name)
        assert os.path.exists(dataset_catalog.get_im_dir(name)), \
            'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name))
        assert os.path.exists(dataset_catalog.get_ann_fn(name)), \
            'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name))
        logger.debug('Creating: {}'.format(name))
        self.name = name
        self.image_directory = dataset_catalog.get_im_dir(name)
        self.image_prefix = dataset_catalog.get_im_prefix(name)
        self.COCO = COCO(dataset_catalog.get_ann_fn(name))
        self.debug_timer = Timer()
        # Set up dataset classes
        category_ids = self.COCO.getCatIds()
        categories = [c['name'] for c in self.COCO.loadCats(category_ids)]
        self.category_to_id_map = dict(zip(categories, category_ids))
        self.classes = ['__background__'] + categories
        self.num_classes = len(self.classes)
        self.json_category_id_to_contiguous_id = {
            v: i + 1
            for i, v in enumerate(self.COCO.getCatIds())
        }
        self.contiguous_category_id_to_json_id = {
            v: k
            for k, v in self.json_category_id_to_contiguous_id.items()
        }
        self._init_keypoints()

    def get_roidb(
        self,
        gt=False,
        proposal_file=None,
        min_proposal_size=2,
        proposal_limit=-1,
        crowd_filter_thresh=0
    ):
        """Return an roidb corresponding to the json dataset. Optionally:
           - include ground truth boxes in the roidb
           - add proposals specified in a proposals file
           - filter proposals based on a minimum side length
           - filter proposals that intersect with crowd regions
        """
        assert gt is True or crowd_filter_thresh == 0, \
            'Crowd filter threshold must be 0 if ground-truth annotations ' \
            'are not included.'
        image_ids = self.COCO.getImgIds()
        image_ids.sort()
        roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))
        for entry in roidb:
            self._prep_roidb_entry(entry)
        if gt:
            # Include ground-truth object annotations
            self.debug_timer.tic()
            for entry in roidb:
                self._add_gt_annotations(entry)
            logger.debug(
                '_add_gt_annotations took {:.3f}s'.
                format(self.debug_timer.toc(average=False))
            )
        if proposal_file is not None:
            # Include proposals from a file
            self.debug_timer.tic()
            self._add_proposals_from_file(
                roidb, proposal_file, min_proposal_size, proposal_limit,
                crowd_filter_thresh
            )
            logger.debug(
                '_add_proposals_from_file took {:.3f}s'.
                format(self.debug_timer.toc(average=False))
            )
        _add_class_assignments(roidb)
        return roidb

    def _prep_roidb_entry(self, entry):
        """Adds empty metadata fields to an roidb entry."""
        # Reference back to the parent dataset
        entry['dataset'] = self
        # Make file_name an abs path
        im_path = os.path.join(
            self.image_directory, self.image_prefix + entry['file_name']
        )
        assert os.path.exists(im_path), 'Image \'{}\' not found'.format(im_path)
        entry['image'] = im_path
        entry['flipped'] = False
        entry['has_visible_keypoints'] = False
        # Empty placeholders
        entry['boxes'] = np.empty((0, 4), dtype=np.float32)
        entry['segms'] = []
        entry['gt_classes'] = np.empty((0), dtype=np.int32)
        entry['seg_areas'] = np.empty((0), dtype=np.float32)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_classes), dtype=np.float32)
        )
        entry['is_crowd'] = np.empty((0), dtype=bool)
        # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index
        # in the list of rois that satisfy np.where(entry['gt_classes'] > 0)
        entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.empty(
                (0, 3, self.num_keypoints), dtype=np.int32
            )
        # Remove unwanted fields that come from the json file (if they exist)
        for k in ['date_captured', 'url', 'license', 'file_name']:
            if k in entry:
                del entry[k]

    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded
            if segm_utils.is_poly(obj['segmentation']):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(int).astype(float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints

    def _add_proposals_from_file(
        self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh
    ):
        """Add proposals from a proposals file to an roidb."""
        logger.info('Loading proposals from: {}'.format(proposal_file))
        proposals = load_object(proposal_file)

        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix

        _remove_proposals_not_in_roidb(proposals, roidb, id_field)
        _sort_proposals(proposals, id_field)
        box_list = []
        for i, entry in enumerate(roidb):
            if i % 2500 == 0:
                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
            boxes = proposals['boxes'][i]
            # Sanity check that these boxes are for the correct image id
            assert entry['id'] == proposals[id_field][i]
            # Remove duplicate boxes and very small boxes and then take top k
            boxes = box_utils.clip_boxes_to_image(
                boxes, entry['height'], entry['width']
            )
            keep = box_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
            boxes = boxes[keep, :]
            if top_k > 0:
                boxes = boxes[:top_k, :]
            box_list.append(boxes)
        _merge_proposal_boxes_into_roidb(roidb, box_list)
        if crowd_thresh > 0:
            _filter_crowd_proposals(roidb, crowd_thresh)

    def _init_keypoints(self):
        """Initialize COCO keypoint information."""
        self.keypoints = None
        self.keypoint_flip_map = None
        self.keypoints_to_id_map = None
        self.num_keypoints = 0
        # Thus far only the 'person' category has keypoints
        if 'person' in self.category_to_id_map:
            cat_info = self.COCO.loadCats([self.category_to_id_map['person']])
        else:
            return

        # Check if the annotations contain keypoint data or not
        if 'keypoints' in cat_info[0]:
            keypoints = cat_info[0]['keypoints']
            self.keypoints_to_id_map = dict(
                zip(keypoints, range(len(keypoints))))
            self.keypoints = keypoints
            self.num_keypoints = len(keypoints)
            self.keypoint_flip_map = {
                'left_eye': 'right_eye',
                'left_ear': 'right_ear',
                'left_shoulder': 'right_shoulder',
                'left_elbow': 'right_elbow',
                'left_wrist': 'right_wrist',
                'left_hip': 'right_hip',
                'left_knee': 'right_knee',
                'left_ankle': 'right_ankle'}

    def _get_gt_keypoints(self, obj):
        """Return ground truth keypoints."""
        if 'keypoints' not in obj:
            return None
        kp = np.array(obj['keypoints'])
        x = kp[0::3]  # 0-indexed x coordinates
        y = kp[1::3]  # 0-indexed y coordinates
        # 0: not labeled; 1: labeled, not inside mask;
        # 2: labeled and inside mask
        v = kp[2::3]
        num_keypoints = len(obj['keypoints']) / 3
        assert num_keypoints == self.num_keypoints
        gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32)
        for i in range(self.num_keypoints):
            gt_kps[0, i] = x[i]
            gt_kps[1, i] = y[i]
            gt_kps[2, i] = v[i]
        return gt_kps


def add_proposals(roidb, rois, scales, crowd_thresh):
    """Add proposal boxes (rois) to an roidb that has ground-truth annotations
    but no proposals. If the proposals are not at the original image scale,
    specify the scale factor that separate them in scales.
    """
    box_list = []
    for i in range(len(roidb)):
        inv_im_scale = 1. / scales[i]
        idx = np.where(rois[:, 0] == i)[0]
        box_list.append(rois[idx, 1:] * inv_im_scale)
    _merge_proposal_boxes_into_roidb(roidb, box_list)
    if crowd_thresh > 0:
        _filter_crowd_proposals(roidb, crowd_thresh)
    _add_class_assignments(roidb)


def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros(
            (num_boxes, entry['gt_overlaps'].shape[1]),
            dtype=entry['gt_overlaps'].dtype
        )
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype
        )

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False)
            )
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(
            entry['boxes'],
            boxes.astype(entry['boxes'].dtype, copy=False),
            axis=0
        )
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)
        )
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)
        )
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)
        )
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(
                entry['box_to_gt_ind_map'].dtype, copy=False
            )
        )


def _filter_crowd_proposals(roidb, crowd_thresh):
    """Finds proposals that are inside crowd regions and marks them as
    overlap = -1 with each ground-truth rois, which means they will be excluded
    from training.
    """
    for entry in roidb:
        gt_overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(entry['is_crowd'] == 1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        iscrowd_flags = [int(True)] * len(crowd_inds)
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        gt_overlaps[non_gt_inds[bad_inds], :] = -1
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)


def _add_class_assignments(roidb):
    """Compute object category assignment for each box associated with each
    roidb entry.
    """
    for entry in roidb:
        gt_overlaps = entry['gt_overlaps'].toarray()
        # max overlap with gt over classes (columns)
        max_overlaps = gt_overlaps.max(axis=1)
        # gt class that had the max overlap
        max_classes = gt_overlaps.argmax(axis=1)
        entry['max_classes'] = max_classes
        entry['max_overlaps'] = max_overlaps
        # sanity checks
        # if max overlap is 0, the class must be background (class 0)
        zero_inds = np.where(max_overlaps == 0)[0]
        assert all(max_classes[zero_inds] == 0)
        # if max overlap > 0, the class must be a fg class (not class 0)
        nonzero_inds = np.where(max_overlaps > 0)[0]
        assert all(max_classes[nonzero_inds] != 0)


def _sort_proposals(proposals, id_field):
    """Sort proposals by the specified id field."""
    order = np.argsort(proposals[id_field])
    fields_to_sort = ['boxes', id_field, 'scores']
    for k in fields_to_sort:
        proposals[k] = [proposals[k][i] for i in order]


def _remove_proposals_not_in_roidb(proposals, roidb, id_field):
    # fix proposals so they don't contain entries for images not in the roidb
    roidb_ids = set({entry["id"] for entry in roidb})
    keep = [i for i, id in enumerate(proposals[id_field]) if id in roidb_ids]
    for f in ['boxes', id_field, 'scores']:
        proposals[f] = [proposals[f][i] for i in keep]


================================================
FILE: detectron/datasets/json_dataset_evaluator.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Functions for evaluating results computed for a json dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import json
import logging
import numpy as np
import os
import six
import uuid

from pycocotools.cocoeval import COCOeval

from detectron.core.config import cfg
from detectron.utils.io import save_object
import detectron.utils.boxes as box_utils

logger = logging.getLogger(__name__)


def evaluate_masks(
    json_dataset,
    all_boxes,
    all_segms,
    output_dir,
    use_salt=True,
    cleanup=False
):
    res_file = os.path.join(
        output_dir, 'segmentations_' + json_dataset.name + '_results'
    )
    if use_salt:
        res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'
    _write_coco_segms_results_file(
        json_dataset, all_boxes, all_segms, res_file)
    # Only do evaluation on non-test sets (annotations are undisclosed on test)
    if json_dataset.name.find('test') == -1:
        coco_eval = _do_segmentation_eval(json_dataset, res_file, output_dir)
    else:
        logger.warning(
            '{} eval ignored as annotations are undisclosed on test: {} ignored'
            .format("Segmentation", json_dataset.name)
        )
        coco_eval = None
    # Optionally cleanup results json file
    if cleanup:
        os.remove(res_file)
    return coco_eval


def _write_coco_segms_results_file(
    json_dataset, all_boxes, all_segms, res_file
):
    # [{"image_id": 42,
    #   "category_id": 18,
    #   "segmentation": [...],
    #   "score": 0.236}, ...]
    results = []
    for cls_ind, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        if cls_ind >= len(all_boxes):
            break
        cat_id = json_dataset.category_to_id_map[cls]
        results.extend(_coco_segms_results_one_category(
            json_dataset, all_boxes[cls_ind], all_segms[cls_ind], cat_id))
    logger.info(
        'Writing segmentation results json to: {}'.format(
            os.path.abspath(res_file)))
    with open(res_file, 'w') as fid:
        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
        # json writer which /always produces strings/ cannot serialize a bytestream
        # unless you decode it. Thankfully, utf-8 works out (which is also what
        # the pycocotools/_mask.pyx does.
        if six.PY3:
            for r in results:
                rle = r['segmentation']
                if 'counts' in rle:
                    rle['counts'] = rle['counts'].decode("utf8")

        json.dump(results, fid)


def _coco_segms_results_one_category(json_dataset, boxes, segms, cat_id):
    results = []
    image_ids = json_dataset.COCO.getImgIds()
    image_ids.sort()
    assert len(boxes) == len(image_ids)
    assert len(segms) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        rles = segms[i]

        if isinstance(dets, list) and len(dets) == 0:
            continue

        dets = dets.astype(float)
        scores = dets[:, -1]

        results.extend(
            [{'image_id': image_id,
              'category_id': cat_id,
              'segmentation': rles[k],
              'score': scores[k]}
              for k in range(dets.shape[0])])

    return results


def _do_segmentation_eval(json_dataset, res_file, output_dir):
    coco_dt = json_dataset.COCO.loadRes(str(res_file))
    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'segm')
    coco_eval.evaluate()
    coco_eval.accumulate()
    _log_detection_eval_metrics(json_dataset, coco_eval)
    eval_file = os.path.join(output_dir, 'segmentation_results.pkl')
    save_object(coco_eval, eval_file)
    logger.info('Wrote json eval results to: {}'.format(eval_file))
    return coco_eval


def evaluate_boxes(
    json_dataset, all_boxes, output_dir, use_salt=True, cleanup=False
):
    res_file = os.path.join(
        output_dir, 'bbox_' + json_dataset.name + '_results'
    )
    if use_salt:
        res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'
    _write_coco_bbox_results_file(json_dataset, all_boxes, res_file)
    # Only do evaluation on non-test sets (annotations are undisclosed on test)
    if json_dataset.name.find('test') == -1:
        coco_eval = _do_detection_eval(json_dataset, res_file, output_dir)
    else:
        logger.warning(
            '{} eval ignored as annotations are undisclosed on test: {} ignored'
            .format("Bbox", json_dataset.name)
        )
        coco_eval = None
    # Optionally cleanup results json file
    if cleanup:
        os.remove(res_file)
    return coco_eval


def _write_coco_bbox_results_file(json_dataset, all_boxes, res_file):
    # [{"image_id": 42,
    #   "category_id": 18,
    #   "bbox": [258.15,41.29,348.26,243.78],
    #   "score": 0.236}, ...]
    results = []
    for cls_ind, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        if cls_ind >= len(all_boxes):
            break
        cat_id = json_dataset.category_to_id_map[cls]
        results.extend(_coco_bbox_results_one_category(
            json_dataset, all_boxes[cls_ind], cat_id))
    logger.info(
        'Writing bbox results json to: {}'.format(os.path.abspath(res_file)))
    with open(res_file, 'w') as fid:
        json.dump(results, fid)


def _coco_bbox_results_one_category(json_dataset, boxes, cat_id):
    results = []
    image_ids = json_dataset.COCO.getImgIds()
    image_ids.sort()
    assert len(boxes) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(float)
        scores = dets[:, -1]
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
        results.extend(
            [{'image_id': image_id,
              'category_id': cat_id,
              'bbox': [xs[k], ys[k], ws[k], hs[k]],
              'score': scores[k]} for k in range(dets.shape[0])])
    return results


def _do_detection_eval(json_dataset, res_file, output_dir):
    coco_dt = json_dataset.COCO.loadRes(str(res_file))
    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    _log_detection_eval_metrics(json_dataset, coco_eval)
    eval_file = os.path.join(output_dir, 'detection_results.pkl')
    save_object(coco_eval, eval_file)
    logger.info('Wrote json eval results to: {}'.format(eval_file))
    return coco_eval


def _log_detection_eval_metrics(json_dataset, coco_eval):
    def _get_thr_ind(coco_eval, thr):
        ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
                       (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
        iou_thr = coco_eval.params.iouThrs[ind]
        assert np.isclose(iou_thr, thr)
        return ind

    IoU_lo_thresh = 0.5
    IoU_hi_thresh = 0.95
    ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
    ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
    # precision has dims (iou, recall, cls, area range, max dets)
    # area range index 0: all area ranges
    # max dets index 2: 100 per image
    precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
    ap_default = np.mean(precision[precision > -1])
    logger.info(
        '~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ~~~~'.format(
            IoU_lo_thresh, IoU_hi_thresh))
    logger.info('{:.1f}'.format(100 * ap_default))
    for cls_ind, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        # minus 1 because of __background__
        precision = coco_eval.eval['precision'][
            ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
        ap = np.mean(precision[precision > -1])
        logger.info('{:.1f}'.format(100 * ap))
    logger.info('~~~~ Summary metrics ~~~~')
    coco_eval.summarize()


def evaluate_box_proposals(
    json_dataset, roidb, thresholds=None, area='all', limit=None, class_specific=False
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7}
    area_ranges = [
        [0**2, 1e5**2],    # all
        [0**2, 32**2],     # small
        [32**2, 96**2],    # medium
        [96**2, 1e5**2],   # large
        [96**2, 128**2],   # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    gt_classes = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where(
            (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        _gt_classes = entry["gt_classes"][valid_gt_inds]
        assert gt_boxes.shape[0] == _gt_classes.shape[0]
        gt_classes = np.hstack((gt_classes, _gt_classes))
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)

    if not class_specific:
        gt_overlaps = np.sort(gt_overlaps)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        ar = recalls.mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
    else:
        gt_classes_unique = np.unique(gt_classes)
        recalls = np.zeros((gt_classes_unique.shape[0], thresholds.shape[0]))
        # compute recall for each category and each iou threshold
        for i, category_id in enumerate(gt_classes_unique):
            inds = (gt_classes == category_id)
            num_pos_per_category = float(inds.sum())
            for j, thresh in enumerate(thresholds):
                recalls[i][j] = (
                    gt_overlaps[inds] >= thresh
                ).sum() / num_pos_per_category
        ar = recalls.mean(axis=1).mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}

def evaluate_keypoints(
    json_dataset,
    all_boxes,
    all_keypoints,
    output_dir,
    use_salt=True,
    cleanup=False
):
    res_file = os.path.join(
        output_dir, 'keypoints_' + json_dataset.name + '_results'
    )
    if use_salt:
        res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'
    _write_coco_keypoint_results_file(
        json_dataset, all_boxes, all_keypoints, res_file)
    # Only do evaluation on non-test sets (annotations are undisclosed on test)
    if json_dataset.name.find('test') == -1:
        coco_eval = _do_keypoint_eval(json_dataset, res_file, output_dir)
    else:
        logger.warning(
            '{} eval ignored as annotations are undisclosed on test: {} ignored'
            .format("Keypoints", json_dataset.name)
        )
        coco_eval = None
    # Optionally cleanup results json file
    if cleanup:
        os.remove(res_file)
    return coco_eval


def _write_coco_keypoint_results_file(
    json_dataset, all_boxes, all_keypoints, res_file
):
    results = []
    for cls_ind, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        if cls_ind >= len(all_keypoints):
            break
        logger.info(
            'Collecting {} results ({:d}/{:d})'.format(
                cls, cls_ind, len(all_keypoints) - 1))
        cat_id = json_dataset.category_to_id_map[cls]
        results.extend(_coco_kp_results_one_category(
            json_dataset, all_boxes[cls_ind], all_keypoints[cls_ind], cat_id))
    logger.info(
        'Writing keypoint results json to: {}'.format(
            os.path.abspath(res_file)))
    with open(res_file, 'w') as fid:
        json.dump(results, fid)


def _coco_kp_results_one_category(json_dataset, boxes, kps, cat_id):
    results = []
    image_ids = json_dataset.COCO.getImgIds()
    image_ids.sort()
    assert len(kps) == len(image_ids)
    assert len(boxes) == len(image_ids)
    use_box_score = False
    if cfg.KRCNN.KEYPOINT_CONFIDENCE == 'logit':
        # This is ugly; see utils.keypoints.heatmap_to_keypoints for the magic
        # indexes
        score_index = 2
    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'prob':
        score_index = 3
    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'bbox':
        use_box_score = True
    else:
        raise ValueError(
            'KRCNN.KEYPOINT_CONFIDENCE must be "logit", "prob", or "bbox"')
    for i, image_id in enumerate(image_ids):
        if len(boxes[i]) == 0:
            continue
        kps_dets = kps[i]
        scores = boxes[i][:, -1].astype(float)
        if len(kps_dets) == 0:
            continue
        for j in range(len(kps_dets)):
            xy = []

            kps_score = 0
            for k in range(kps_dets[j].shape[1]):
                xy.append(float(kps_dets[j][0, k]))
                xy.append(float(kps_dets[j][1, k]))
                xy.append(1)
                if not use_box_score:
                    kps_score += kps_dets[j][score_index, k]

            if use_box_score:
                kps_score = scores[j]
            else:
                kps_score /= kps_dets[j].shape[1]

            results.extend([{'image_id': image_id,
                             'category_id': cat_id,
                             'keypoints': xy,
                             'score': kps_score}])
    return results


def _do_keypoint_eval(json_dataset, res_file, output_dir):
    ann_type = 'keypoints'
    imgIds = json_dataset.COCO.getImgIds()
    imgIds.sort()
    coco_dt = json_dataset.COCO.loadRes(res_file)
    coco_eval = COCOeval(json_dataset.COCO, coco_dt, ann_type)
    coco_eval.params.imgIds = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    eval_file = os.path.join(output_dir, 'keypoint_results.pkl')
    save_object(coco_eval, eval_file)
    logger.info('Wrote json eval results to: {}'.format(eval_file))
    coco_eval.summarize()
    return coco_eval


================================================
FILE: detectron/datasets/roidb.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Functions for common roidb manipulations."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from past.builtins import basestring
import logging
import numpy as np

from detectron.core.config import cfg
from detectron.datasets.json_dataset import JsonDataset
import detectron.utils.boxes as box_utils
import detectron.utils.keypoints as keypoint_utils
import detectron.utils.segms as segm_utils

logger = logging.getLogger(__name__)


def combined_roidb_for_training(dataset_names, proposal_files):
    """Load and concatenate roidbs for one or more datasets, along with optional
    object proposals. The roidb entries are then prepared for use in training,
    which involves caching certain types of metadata for each roidb entry.
    """
    def get_roidb(dataset_name, proposal_file):
        ds = JsonDataset(dataset_name)
        roidb = ds.get_roidb(
            gt=True,
            proposal_file=proposal_file,
            crowd_filter_thresh=cfg.TRAIN.CROWD_FILTER_THRESH
        )
        if cfg.TRAIN.USE_FLIPPED:
            logger.info('Appending horizontally-flipped training examples...')
            extend_with_flipped_entries(roidb, ds)
        logger.info('Loaded dataset: {:s}'.format(ds.name))
        return roidb

    if isinstance(dataset_names, basestring):
        dataset_names = (dataset_names, )
    if isinstance(proposal_files, basestring):
        proposal_files = (proposal_files, )
    if len(proposal_files) == 0:
        proposal_files = (None, ) * len(dataset_names)
    assert len(dataset_names) == len(proposal_files)
    roidbs = [get_roidb(*args) for args in zip(dataset_names, proposal_files)]
    roidb = roidbs[0]
    for r in roidbs[1:]:
        roidb.extend(r)
    roidb = filter_for_training(roidb)

    logger.info('Computing bounding-box regression targets...')
    add_bbox_regression_targets(roidb)
    logger.info('done')

    _compute_and_log_stats(roidb)

    return roidb


def extend_with_flipped_entries(roidb, dataset):
    """Flip each entry in the given roidb and return a new roidb that is the
    concatenation of the original roidb and the flipped entries.

    "Flipping" an entry means that that image and associated metadata (e.g.,
    ground truth boxes and object proposals) are horizontally flipped.
    """
    flipped_roidb = []
    for entry in roidb:
        width = entry['width']
        boxes = entry['boxes'].copy()
        oldx1 = boxes[:, 0].copy()
        oldx2 = boxes[:, 2].copy()
        boxes[:, 0] = width - oldx2 - 1
        boxes[:, 2] = width - oldx1 - 1
        assert (boxes[:, 2] >= boxes[:, 0]).all()
        flipped_entry = {}
        dont_copy = ('boxes', 'segms', 'gt_keypoints', 'flipped')
        for k, v in entry.items():
            if k not in dont_copy:
                flipped_entry[k] = v
        flipped_entry['boxes'] = boxes
        flipped_entry['segms'] = segm_utils.flip_segms(
            entry['segms'], entry['height'], entry['width']
        )
        if dataset.keypoints is not None:
            flipped_entry['gt_keypoints'] = keypoint_utils.flip_keypoints(
                dataset.keypoints, dataset.keypoint_flip_map,
                entry['gt_keypoints'], entry['width']
            )
        flipped_entry['flipped'] = True
        flipped_roidb.append(flipped_entry)
    roidb.extend(flipped_roidb)


def filter_for_training(roidb):
    """Remove roidb entries that have no usable RoIs based on config settings.
    """
    def is_valid(entry):
        # Valid images have:
        #   (1) At least one foreground RoI OR
        #   (2) At least one background RoI
        overlaps = entry['max_overlaps']
        # find boxes with sufficient overlap
        fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
                           (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
        # image is only valid if such boxes exist
        valid = len(fg_inds) > 0 or len(bg_inds) > 0
        if cfg.MODEL.KEYPOINTS_ON:
            # If we're training for keypoints, exclude images with no keypoints
            valid = valid and entry['has_visible_keypoints']
        return valid

    num = len(roidb)
    filtered_roidb = [entry for entry in roidb if is_valid(entry)]
    num_after = len(filtered_roidb)
    logger.info('Filtered {} roidb entries: {} -> {}'.
                format(num - num_after, num, num_after))
    return filtered_roidb


def add_bbox_regression_targets(roidb):
    """Add information needed to train bounding-box regressors."""
    for entry in roidb:
        entry['bbox_targets'] = compute_bbox_regression_targets(entry)


def compute_bbox_regression_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds, 0] = (
        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets


def _compute_and_log_stats(roidb):
    classes = roidb[0]['dataset'].classes
    char_len = np.max([len(c) for c in classes])
    hist_bins = np.arange(len(classes) + 1)

    # Histogram of ground-truth objects
    gt_hist = np.zeros((len(classes)), dtype=int)
    for entry in roidb:
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_classes = entry['gt_classes'][gt_inds]
        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
    logger.debug('Ground-truth class histogram:')
    for i, v in enumerate(gt_hist):
        logger.debug(
            '{:d}{:s}: {:d}'.format(
                i, classes[i].rjust(char_len), v))
    logger.debug('-' * char_len)
    logger.debug(
        '{:s}: {:d}'.format(
            'total'.rjust(char_len), np.sum(gt_hist)))


================================================
FILE: detectron/datasets/task_evaluation.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Evaluation interface for supported tasks (box detection, instance
segmentation, keypoint detection, ...).


Results are stored in an OrderedDict with the following nested structure:

<dataset>:
  <task>:
    <metric>: <val>

<dataset> is any valid dataset (e.g., 'coco_2014_minival')
<task> is in ['box', 'mask', 'keypoint', 'box_proposal']
<metric> can be ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR@1000',
                 'ARs@1000', 'ARm@1000', 'ARl@1000', ...]
<val> is a floating point number
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import OrderedDict
import logging
import os
import pprint

from detectron.core.config import cfg
from detectron.utils.logging import send_email
import detectron.datasets.cityscapes_json_dataset_evaluator \
    as cs_json_dataset_evaluator
import detectron.datasets.json_dataset_evaluator as json_dataset_evaluator
import detectron.datasets.voc_dataset_evaluator as voc_dataset_evaluator

logger = logging.getLogger(__name__)


def evaluate_all(
    dataset, all_boxes, all_segms, all_keyps, output_dir, use_matlab=False
):
    """Evaluate "all" tasks, where "all" includes box detection, instance
    segmentation, and keypoint detection.
    """
    all_results = evaluate_boxes(
        dataset, all_boxes, output_dir, use_matlab=use_matlab
    )
    logger.info('Evaluating bounding boxes is done!')
    if cfg.MODEL.MASK_ON:
        results = evaluate_masks(dataset, all_boxes, all_segms, output_dir)
        all_results[dataset.name].update(results[dataset.name])
        logger.info('Evaluating segmentations is done!')
    if cfg.MODEL.KEYPOINTS_ON:
        results = evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir)
        all_results[dataset.name].update(results[dataset.name])
        logger.info('Evaluating keypoints is done!')
    return all_results


def evaluate_boxes(dataset, all_boxes, output_dir, use_matlab=False):
    """Evaluate bounding box detection."""
    logger.info('Evaluating detections')
    not_comp = not cfg.TEST.COMPETITION_MODE
    if _use_json_dataset_evaluator(dataset):
        coco_eval = json_dataset_evaluator.evaluate_boxes(
            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp
        )
        box_results = _coco_eval_to_box_results(coco_eval)
    elif _use_cityscapes_evaluator(dataset):
        logger.warn('Cityscapes bbox evaluated using COCO metrics/conversions')
        coco_eval = json_dataset_evaluator.evaluate_boxes(
            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp
        )
        box_results = _coco_eval_to_box_results(coco_eval)
    elif _use_voc_evaluator(dataset):
        # For VOC, always use salt and always cleanup because results are
        # written to the shared VOCdevkit results directory
        voc_eval = voc_dataset_evaluator.evaluate_boxes(
            dataset, all_boxes, output_dir, use_matlab=use_matlab
        )
        box_results = _voc_eval_to_box_results(voc_eval)
    else:
        raise NotImplementedError(
            'No evaluator for dataset: {}'.format(dataset.name)
        )
    return OrderedDict([(dataset.name, box_results)])


def evaluate_masks(dataset, all_boxes, all_segms, output_dir):
    """Evaluate instance segmentation."""
    logger.info('Evaluating segmentations')
    not_comp = not cfg.TEST.COMPETITION_MODE
    if _use_json_dataset_evaluator(dataset):
        coco_eval = json_dataset_evaluator.evaluate_masks(
            dataset,
            all_boxes,
            all_segms,
            output_dir,
            use_salt=not_comp,
            cleanup=not_comp
        )
        mask_results = _coco_eval_to_mask_results(coco_eval)
    elif _use_cityscapes_evaluator(dataset):
        cs_eval = cs_json_dataset_evaluator.evaluate_masks(
            dataset,
            all_boxes,
            all_segms,
            output_dir,
            use_salt=not_comp,
            cleanup=not_comp
        )
        mask_results = _cs_eval_to_mask_results(cs_eval)
    else:
        raise NotImplementedError(
            'No evaluator for dataset: {}'.format(dataset.name)
        )
    return OrderedDict([(dataset.name, mask_results)])


def evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir):
    """Evaluate human keypoint detection (i.e., 2D pose estimation)."""
    logger.info('Evaluating detections')
    not_comp = not cfg.TEST.COMPETITION_MODE
    assert dataset.name.startswith('keypoints_coco_'), \
        'Only COCO keypoints are currently supported'
    coco_eval = json_dataset_evaluator.evaluate_keypoints(
        dataset,
        all_boxes,
        all_keyps,
        output_dir,
        use_salt=not_comp,
        cleanup=not_comp
    )
    keypoint_results = _coco_eval_to_keypoint_results(coco_eval)
    return OrderedDict([(dataset.name, keypoint_results)])


def evaluate_box_proposals(dataset, roidb):
    """Evaluate bounding box object proposals."""
    res = _empty_box_proposal_results()
    areas = {'all': '', 'small': 's', 'medium': 'm', 'large': 'l'}
    for limit in [100, 1000]:
        for area, suffix in areas.items():
            stats = json_dataset_evaluator.evaluate_box_proposals(
                dataset,
                roidb,
                area=area,
                limit=limit,
                class_specific=cfg.TEST.CLASS_SPECIFIC_AR
            )
            key = 'AR{}@{:d}'.format(suffix, limit)
            res['box_proposal'][key] = stats['ar']
    return OrderedDict([(dataset.name, res)])


def log_box_proposal_results(results):
    """Log bounding box proposal results."""
    for dataset in results.keys():
        keys = results[dataset]['box_proposal'].keys()
        pad = max([len(k) for k in keys])
        logger.info(dataset)
        for k, v in results[dataset]['box_proposal'].items():
            logger.info('{}: {:.3f}'.format(k.ljust(pad), v))


def log_copy_paste_friendly_results(results):
    """Log results in a format that makes it easy to copy-and-paste in a
    spreadsheet. Lines are prefixed with 'copypaste: ' to make grepping easy.
    """
    for dataset in results.keys():
        logger.info('copypaste: Dataset: {}'.format(dataset))
        for task, metrics in results[dataset].items():
            logger.info('copypaste: Task: {}'.format(task))
            metric_names = metrics.keys()
            metric_vals = ['{:.4f}'.format(v) for v in metrics.values()]
            logger.info('copypaste: ' + ','.join(metric_names))
            logger.info('copypaste: ' + ','.join(metric_vals))


def check_expected_results(results, atol=0.005, rtol=0.1):
    """Check actual results against expected results stored in
    cfg.EXPECTED_RESULTS. Optionally email if the match exceeds the specified
    tolerance.

    Expected results should take the form of a list of expectations, each
    specified by four elements: [dataset, task, metric, expected value]. For
    example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...].

    The expected value may also be formatted as a list [mean, std] providing
    an empirical mean and standard deviation from which a valid range is computed
    using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example:
    [['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...]
    """
    # cfg contains a reference set of results that we want to check against
    if len(cfg.EXPECTED_RESULTS) == 0:
        return

    for dataset, task, metric, expected_val in cfg.EXPECTED_RESULTS:
        assert dataset in results, 'Dataset {} not in results'.format(dataset)
        assert task in results[dataset], 'Task {} not in results'.format(task)
        assert metric in results[dataset][task], \
            'Metric {} not in results'.format(metric)
        actual_val = results[dataset][task][metric]
        ok = False
        if isinstance(expected_val, list):
            assert len(expected_val) == 2, (
                'Expected result must be in (mean, std) format'
            )
            mean, std = expected_val
            lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std
            hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std
            ok = (lo < actual_val) and (actual_val < hi)
            msg = (
                '{} > {} > {} sanity check (actual vs. expected): '
                '{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})'
            ).format(dataset, task, metric, actual_val, mean, std, lo, hi)
        else:
            err = abs(actual_val - expected_val)
            tol = atol + rtol * abs(expected_val)
            ok = (err > tol)
            msg = (
                '{} > {} > {} sanity check (actual vs. expected): '
                '{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}'
            ).format(dataset, task, metric, actual_val, expected_val, err, tol)
        if not ok:
            msg = 'FAIL: ' + msg
            logger.error(msg)
            if cfg.EXPECTED_RESULTS_EMAIL != '':
                subject = 'Detectron end-to-end test failure'
                job_name = os.environ[
                    'DETECTRON_JOB_NAME'
                ] if 'DETECTRON_JOB_NAME' in os.environ else '<unknown>'
                job_id = os.environ[
                    'WORKFLOW_RUN_ID'
                ] if 'WORKFLOW_RUN_ID' in os.environ else '<unknown>'
                body = [
                    'Name:',
                    job_name,
                    'Run ID:',
                    job_id,
                    'Failure:',
                    msg,
                    'Config:',
                    pprint.pformat(cfg),
                    'Env:',
                    pprint.pformat(dict(os.environ)),
                ]
                send_email(
                    subject, '\n\n'.join(body), cfg.EXPECTED_RESULTS_EMAIL
                )
        else:
            msg = 'PASS: ' + msg
            logger.info(msg)


def _use_json_dataset_evaluator(dataset):
    """Check if the dataset uses the general json dataset evaluator."""
    return dataset.name.find('coco_') > -1 or cfg.TEST.FORCE_JSON_DATASET_EVAL


def _use_cityscapes_evaluator(dataset):
    """Check if the dataset uses the Cityscapes dataset evaluator."""
    return dataset.name.find('cityscapes_') > -1


def _use_voc_evaluator(dataset):
    """Check if the dataset uses the PASCAL VOC dataset evaluator."""
    return dataset.name[:4] == 'voc_'


# Indices in the stats array for COCO boxes and masks
COCO_AP = 0
COCO_AP50 = 1
COCO_AP75 = 2
COCO_APS = 3
COCO_APM = 4
COCO_APL = 5
# Slight difference for keypoints
COCO_KPS_APM = 3
COCO_KPS_APL = 4


# ---------------------------------------------------------------------------- #
# Helper functions for producing properly formatted results.
# ---------------------------------------------------------------------------- #

def _coco_eval_to_box_results(coco_eval):
    res = _empty_box_results()
    if coco_eval is not None:
        s = coco_eval.stats
        res['box']['AP'] = s[COCO_AP]
        res['box']['AP50'] = s[COCO_AP50]
        res['box']['AP75'] = s[COCO_AP75]
        res['box']['APs'] = s[COCO_APS]
        res['box']['APm'] = s[COCO_APM]
        res['box']['APl'] = s[COCO_APL]
    return res


def _coco_eval_to_mask_results(coco_eval):
    res = _empty_mask_results()
    if coco_eval is not None:
        s = coco_eval.stats
        res['mask']['AP'] = s[COCO_AP]
        res['mask']['AP50'] = s[COCO_AP50]
        res['mask']['AP75'] = s[COCO_AP75]
        res['mask']['APs'] = s[COCO_APS]
        res['mask']['APm'] = s[COCO_APM]
        res['mask']['APl'] = s[COCO_APL]
    return res


def _coco_eval_to_keypoint_results(coco_eval):
    res = _empty_keypoint_results()
    if coco_eval is not None:
        s = coco_eval.stats
        res['keypoint']['AP'] = s[COCO_AP]
        res['keypoint']['AP50'] = s[COCO_AP50]
        res['keypoint']['AP75'] = s[COCO_AP75]
        res['keypoint']['APm'] = s[COCO_KPS_APM]
        res['keypoint']['APl'] = s[COCO_KPS_APL]
    return res


def _voc_eval_to_box_results(voc_eval):
    # Not supported (return empty results)
    return _empty_box_results()


def _cs_eval_to_mask_results(cs_eval):
    # Not supported (return empty results)
    return _empty_mask_results()


def _empty_box_results():
    return OrderedDict({
        'box':
        OrderedDict(
            [
                ('AP', -1),
                ('AP50', -1),
                ('AP75', -1),
                ('APs', -1),
                ('APm', -1),
                ('APl', -1),
            ]
        )
    })


def _empty_mask_results():
    return OrderedDict({
        'mask':
        OrderedDict(
            [
                ('AP', -1),
                ('AP50', -1),
                ('AP75', -1),
                ('APs', -1),
                ('APm', -1),
                ('APl', -1),
            ]
        )
    })


def _empty_keypoint_results():
    return OrderedDict({
        'keypoint':
        OrderedDict(
            [
                ('AP', -1),
                ('AP50', -1),
                ('AP75', -1),
                ('APm', -1),
                ('APl', -1),
            ]
        )
    })


def _empty_box_proposal_results():
    return OrderedDict({
        'box_proposal':
        OrderedDict(
            [
                ('AR@100', -1),
                ('ARs@100', -1),
                ('ARm@100', -1),
                ('ARl@100', -1),
                ('AR@1000', -1),
                ('ARs@1000', -1),
                ('ARm@1000', -1),
                ('ARl@1000', -1),
            ]
        )
    })


================================================
FILE: detectron/datasets/voc_dataset_evaluator.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""PASCAL VOC dataset evaluation interface."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import os
import shutil
import uuid

from detectron.core.config import cfg
from detectron.datasets.dataset_catalog import get_devkit_dir
from detectron.datasets.voc_eval import voc_eval
from detectron.utils.io import save_object

logger = logging.getLogger(__name__)


def evaluate_boxes(
    json_dataset,
    all_boxes,
    output_dir,
    use_salt=True,
    cleanup=True,
    use_matlab=False
):
    salt = '_{}'.format(str(uuid.uuid4())) if use_salt else ''
    filenames = _write_voc_results_files(json_dataset, all_boxes, salt)
    _do_python_eval(json_dataset, salt, output_dir)
    if use_matlab:
        _do_matlab_eval(json_dataset, salt, output_dir)
    if cleanup:
        for filename in filenames:
            shutil.copy(filename, output_dir)
            os.remove(filename)
    return None


def _write_voc_results_files(json_dataset, all_boxes, salt):
    filenames = []
    image_set_path = voc_info(json_dataset)['image_set_path']
    assert os.path.exists(image_set_path), \
        'Image set path does not exist: {}'.format(image_set_path)
    with open(image_set_path, 'r') as f:
        image_index = [x.strip() for x in f.readlines()]
    # Sanity check that order of images in json dataset matches order in the
    # image set
    roidb = json_dataset.get_roidb()
    for i, entry in enumerate(roidb):
        index = os.path.splitext(os.path.split(entry['image'])[1])[0]
        assert index == image_index[i]
    for cls_ind, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        logger.info('Writing VOC results for: {}'.format(cls))
        filename = _get_voc_results_file_template(json_dataset,
                                                  salt).format(cls)
        filenames.append(filename)
        assert len(all_boxes[cls_ind]) == len(image_index)
        with open(filename, 'wt') as f:
            for im_ind, index in enumerate(image_index):
                dets = all_boxes[cls_ind][im_ind]
                if type(dets) == list:
                    assert len(dets) == 0, \
                        'dets should be numpy.ndarray or empty list'
                    continue
                # the VOCdevkit expects 1-based indices
                for k in range(dets.shape[0]):
                    f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                            format(index, dets[k, -1],
                                   dets[k, 0] + 1, dets[k, 1] + 1,
                                   dets[k, 2] + 1, dets[k, 3] + 1))
    return filenames


def _get_voc_results_file_template(json_dataset, salt):
    info = voc_info(json_dataset)
    year = info['year']
    image_set = info['image_set']
    devkit_path = info['devkit_path']
    # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
    filename = 'comp4' + salt + '_det_' + image_set + '_{:s}.txt'
    return os.path.join(devkit_path, 'results', 'VOC' + year, 'Main', filename)


def _do_python_eval(json_dataset, salt, output_dir='output'):
    info = voc_info(json_dataset)
    year = info['year']
    anno_path = info['anno_path']
    image_set_path = info['image_set_path']
    devkit_path = info['devkit_path']
    cachedir = os.path.join(devkit_path, 'annotations_cache')
    aps = []
    # The PASCAL VOC metric changed in 2010
    use_07_metric = True if int(year) < 2010 else False
    logger.info('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    for _, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        filename = _get_voc_results_file_template(
            json_dataset, salt).format(cls)
        rec, prec, ap = voc_eval(
            filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
            use_07_metric=use_07_metric)
        aps += [ap]
        logger.info('AP for {} = {:.4f}'.format(cls, ap))
        res_file = os.path.join(output_dir, cls + '_pr.pkl')
        save_object({'rec': rec, 'prec': prec, 'ap': ap}, res_file)
    logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
    logger.info('~~~~~~~~')
    logger.info('Results:')
    for ap in aps:
        logger.info('{:.3f}'.format(ap))
    logger.info('{:.3f}'.format(np.mean(aps)))
    logger.info('~~~~~~~~')
    logger.info('')
    logger.info('----------------------------------------------------------')
    logger.info('Results computed with the **unofficial** Python eval code.')
    logger.info('Results should be very close to the official MATLAB code.')
    logger.info('Use `./tools/reval.py --matlab ...` for your paper.')
    logger.info('-- Thanks, The Management')
    logger.info('----------------------------------------------------------')


def _do_matlab_eval(json_dataset, salt, output_dir='output'):
    import subprocess
    logger.info('-----------------------------------------------------')
    logger.info('Computing results with the official MATLAB eval code.')
    logger.info('-----------------------------------------------------')
    info = voc_info(json_dataset)
    path = os.path.join(
        cfg.ROOT_DIR, 'detectron', 'datasets', 'VOCdevkit-matlab-wrapper')
    cmd = 'cd {} && '.format(path)
    cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
    cmd += '-r "dbstop if error; '
    cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
       .format(info['devkit_path'], 'comp4' + salt, info['image_set'],
               output_dir)
    logger.info('Running:\n{}'.format(cmd))
    subprocess.call(cmd, shell=True)


def voc_info(json_dataset):
    year = json_dataset.name[4:8]
    image_set = json_dataset.name[9:]
    devkit_path = get_devkit_dir(json_dataset.name)
    assert os.path.exists(devkit_path), \
        'Devkit directory {} not found'.format(devkit_path)
    anno_path = os.path.join(
        devkit_path, 'VOC' + year, 'Annotations', '{:s}.xml')
    image_set_path = os.path.join(
        devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt')
    return dict(
        year=year,
        image_set=image_set,
        devkit_path=devkit_path,
        anno_path=anno_path,
        image_set_path=image_set_path)


================================================
FILE: detectron/datasets/voc_eval.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------

"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""

import logging
import numpy as np
import os
import xml.etree.ElementTree as ET

from detectron.utils.io import load_object
from detectron.utils.io import save_object

logger = logging.getLogger(__name__)


def parse_rec(filename):
    """Parse a PASCAL VOC xml file."""
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects


def voc_ap(rec, prec, use_07_metric=False):
    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
    the VOC 07 11-point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap


def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    imageset = os.path.splitext(os.path.basename(imagesetfile))[0]
    cachefile = os.path.join(cachedir, imageset + '_annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                logger.info(
                    'Reading annotation for {:d}/{:d}'.format(
                        i + 1, len(imagenames)))
        # save
        logger.info('Saving cached annotations to {:s}'.format(cachefile))
        save_object(recs, cachefile)
    else:
        recs = load_object(cachefile)

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap


================================================
FILE: detectron/modeling/FPN.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Functions for using a Feature Pyramid Network (FPN)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import collections
import numpy as np

from detectron.core.config import cfg
from detectron.modeling.generate_anchors import generate_anchors
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill
from detectron.utils.net import get_group_gn
import detectron.modeling.ResNet as ResNet
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils

# Lowest and highest pyramid levels in the backbone network. For FPN, we assume
# that all networks have 5 spatial reductions, each by a factor of 2. Level 1
# would correspond to the input image, hence it does not make sense to use it.
LOWEST_BACKBONE_LVL = 2   # E.g., "conv2"-like level
HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level


# ---------------------------------------------------------------------------- #
# FPN with ResNet
# ---------------------------------------------------------------------------- #

def add_fpn_ResNet50_conv5_body(model):
    return add_fpn_onto_conv_body(
        model, ResNet.add_ResNet50_conv5_body, fpn_level_info_ResNet50_conv5
    )


def add_fpn_ResNet50_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(
        model,
        ResNet.add_ResNet50_conv5_body,
        fpn_level_info_ResNet50_conv5,
        P2only=True
    )


def add_fpn_ResNet101_conv5_body(model):
    return add_fpn_onto_conv_body(
        model, ResNet.add_ResNet101_conv5_body, fpn_level_info_ResNet101_conv5
    )


def add_fpn_ResNet101_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(
        model,
        ResNet.add_ResNet101_conv5_body,
        fpn_level_info_ResNet101_conv5,
        P2only=True
    )


def add_fpn_ResNet152_conv5_body(model):
    return add_fpn_onto_conv_body(
        model, ResNet.add_ResNet152_conv5_body, fpn_level_info_ResNet152_conv5
    )


def add_fpn_ResNet152_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(
        model,
        ResNet.add_ResNet152_conv5_body,
        fpn_level_info_ResNet152_conv5,
        P2only=True
    )


# ---------------------------------------------------------------------------- #
# Functions for bolting FPN onto a backbone architectures
# ---------------------------------------------------------------------------- #

def add_fpn_onto_conv_body(
    model, conv_body_func, fpn_level_info_func, P2only=False
):
    """Add the specified conv body to the model and then add FPN levels to it.
    """
    # Note: blobs_conv is in revsersed order: [fpn5, fpn4, fpn3, fpn2]
    # similarly for dims_conv: [2048, 1024, 512, 256]
    # similarly for spatial_scales_fpn: [1/32, 1/16, 1/8, 1/4]

    conv_body_func(model)
    blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(
        model, fpn_level_info_func()
    )

    if P2only:
        # use only the finest level
        return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]
    else:
        # use all levels
        return blobs_fpn, dim_fpn, spatial_scales_fpn


def add_fpn(model, fpn_level_info):
    """Add FPN connections based on the model described in the FPN paper."""
    # FPN levels are built starting from the highest/coarest level of the
    # backbone (usually "conv5"). First we build down, recursively constructing
    # lower/finer resolution FPN levels. Then we build up, constructing levels
    # that are even higher/coarser than the starting level.
    fpn_dim = cfg.FPN.DIM
    min_level, max_level = get_min_max_levels()
    # Count the number of backbone stages that we will generate FPN levels for
    # starting from the coarest backbone stage (usually the "conv5"-like level)
    # E.g., if the backbone level info defines stages 4 stages: "conv5",
    # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4
    # backbone stages to add FPN to.
    num_backbone_stages = (
        len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
    )

    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
    output_blobs = [
        'fpn_inner_{}'.format(s)
        for s in fpn_level_info.blobs[:num_backbone_stages]
    ]
    fpn_dim_lateral = fpn_level_info.dims
    xavier_fill = ('XavierFill', {})

    # For the coarsest backbone level: 1x1 conv only seeds recursion
    if cfg.FPN.USE_GN:
        # use GroupNorm
        c = model.ConvGN(
            lateral_input_blobs[0],
            output_blobs[0],  # note: this is a prefix
            dim_in=fpn_dim_lateral[0],
            dim_out=fpn_dim,
            group_gn=get_group_gn(fpn_dim),
            kernel=1,
            pad=0,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )
        output_blobs[0] = c  # rename it
    else:
        model.Conv(
            lateral_input_blobs[0],
            output_blobs[0],
            dim_in=fpn_dim_lateral[0],
            dim_out=fpn_dim,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )

    #
    # Step 1: recursively build down starting from the coarsest backbone level
    #

    # For other levels add top-down and lateral connections
    for i in range(num_backbone_stages - 1):
        add_topdown_lateral_module(
            model,
            output_blobs[i],             # top-down blob
            lateral_input_blobs[i + 1],  # lateral blob
            output_blobs[i + 1],         # next output blob
            fpn_dim,                     # output dimension
            fpn_dim_lateral[i + 1]       # lateral input dimension
        )

    # Post-hoc scale-specific 3x3 convs
    blobs_fpn = []
    spatial_scales = []
    for i in range(num_backbone_stages):
        if cfg.FPN.USE_GN:
            # use GroupNorm
            fpn_blob = model.ConvGN(
                output_blobs[i],
                'fpn_{}'.format(fpn_level_info.blobs[i]),
                dim_in=fpn_dim,
                dim_out=fpn_dim,
                group_gn=get_group_gn(fpn_dim),
                kernel=3,
                pad=1,
                stride=1,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
        else:
            fpn_blob = model.Conv(
                output_blobs[i],
                'fpn_{}'.format(fpn_level_info.blobs[i]),
                dim_in=fpn_dim,
                dim_out=fpn_dim,
                kernel=3,
                pad=1,
                stride=1,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
        blobs_fpn += [fpn_blob]
        spatial_scales += [fpn_level_info.spatial_scales[i]]

    #
    # Step 2: build up starting from the coarsest backbone level
    #

    # Check if we need the P6 feature map
    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
        # Original FPN P6 level implementation from our CVPR'17 FPN paper
        P6_blob_in = blobs_fpn[0]
        P6_name = P6_blob_in + '_subsampled_2x'
        # Use max pooling to simulate stride 2 subsampling
        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
        blobs_fpn.insert(0, P6_blob)
        spatial_scales.insert(0, spatial_scales[0] * 0.5)

    # Coarser FPN levels introduced for RetinaNet
    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
        fpn_blob = fpn_level_info.blobs[0]
        dim_in = fpn_level_info.dims[0]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
            fpn_blob_in = fpn_blob
            if i > HIGHEST_BACKBONE_LVL + 1:
                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
            fpn_blob = model.Conv(
                fpn_blob_in,
                'fpn_' + str(i),
                dim_in=dim_in,
                dim_out=fpn_dim,
                kernel=3,
                pad=1,
                stride=2,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
            dim_in = fpn_dim
            blobs_fpn.insert(0, fpn_blob)
            spatial_scales.insert(0, spatial_scales[0] * 0.5)

    return blobs_fpn, fpn_dim, spatial_scales


def add_topdown_lateral_module(
    model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral
):
    """Add a top-down lateral module."""
    # Lateral 1x1 conv
    if cfg.FPN.USE_GN:
        # use GroupNorm
        lat = model.ConvGN(
            fpn_lateral,
            fpn_bottom + '_lateral',
            dim_in=dim_lateral,
            dim_out=dim_top,
            group_gn=get_group_gn(dim_top),
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(
                const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL
                else ('XavierFill', {})),
            bias_init=const_fill(0.0)
        )
    else:
        lat = model.Conv(
            fpn_lateral,
            fpn_bottom + '_lateral',
            dim_in=dim_lateral,
            dim_out=dim_top,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(
                const_fill(0.0)
                if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})
            ),
            bias_init=const_fill(0.0)
        )
    # Top-down 2x upsampling
    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
    # Sum lateral and top-down
    model.net.Sum([lat, td], fpn_bottom)


def get_min_max_levels():
    """The min and max FPN levels required for supporting RPN and/or RoI
    transform operations on multiple FPN levels.
    """
    min_level = LOWEST_BACKBONE_LVL
    max_level = HIGHEST_BACKBONE_LVL
    if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.RPN_MAX_LEVEL
        min_level = cfg.FPN.RPN_MIN_LEVEL
    if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.ROI_MAX_LEVEL
        min_level = cfg.FPN.ROI_MIN_LEVEL
    if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)
        min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)
    return min_level, max_level


# ---------------------------------------------------------------------------- #
# RPN with an FPN backbone
# ---------------------------------------------------------------------------- #

def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """Add RPN on FPN specific outputs."""
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
    dim_out = dim_in

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    assert len(blobs_in) == k_max - k_min + 1
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # Create conv ops with randomly initialized weights and
            # zeroed biases for the first FPN level; these will be shared by
            # all other FPN levels
            # RPN hidden representation
            conv_rpn_fpn = model.Conv(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
        else:
            # Share weights and biases
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_cls_logits_fpn' + sk_min + '_w',
                bias='rpn_cls_logits_fpn' + sk_min + '_b'
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
            )

        if not model.train or cfg.MODEL.FASTER_RCNN:
            # Proposals are needed during:
            #  1) inference (== not model.train) for RPN only and Faster R-CNN
            #  OR
            #  2) training for Faster R-CNN
            # Otherwise (== training for RPN only), proposals are not needed
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
            )
            rpn_cls_probs_fpn = model.net.Sigmoid(
                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
            )
            model.GenerateProposals(
                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                anchors=lvl_anchors,
                spatial_scale=sc
            )


def add_fpn_rpn_losses(model):
    """Add RPN on FPN specific losses."""
    loss_gradients = {}
    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
        slvl = str(lvl)
        # Spatially narrow the full-sized RPN label arrays to match the feature map
        # shape
        model.net.SpatialNarrowAs(
            ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
            'rpn_labels_int32_fpn' + slvl
        )
        for key in ('targets', 'inside_weights', 'outside_weights'):
            model.net.SpatialNarrowAs(
                [
                    'rpn_bbox_' + key + '_wide_fpn' + slvl,
                    'rpn_bbox_pred_fpn' + slvl
                ],
                'rpn_bbox_' + key + '_fpn' + slvl
            )
        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
            'loss_rpn_cls_fpn' + slvl,
            normalize=0,
            scale=(
                model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
                cfg.TRAIN.IMS_PER_BATCH
            )
        )
        # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
        # handled by (1) setting bbox outside weights and (2) SmoothL1Loss
        # normalizes by IMS_PER_BATCH
        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
            [
                'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
                'rpn_bbox_inside_weights_fpn' + slvl,
                'rpn_bbox_outside_weights_fpn' + slvl
            ],
            'loss_rpn_bbox_fpn' + slvl,
            beta=1. / 9.,
            scale=model.GetLossScale(),
        )
        loss_gradients.update(
            blob_utils.
            get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
        )
        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
    return loss_gradients


# ---------------------------------------------------------------------------- #
# Helper functions for working with multilevel FPN RoIs
# ---------------------------------------------------------------------------- #

def map_rois_to_fpn_levels(rois, k_min, k_max):
    """Determine which FPN level each RoI in a set of RoIs should map to based
    on the heuristic in the FPN paper.
    """
    # Compute level ids
    s = np.sqrt(box_utils.boxes_area(rois))
    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4

    # Eqn.(1) in FPN paper
    target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
    target_lvls = np.clip(target_lvls, k_min, k_max)
    return target_lvls


def add_multilevel_roi_blobs(
    blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max
):
    """Add RoI blobs for multiple FPN levels to the blobs dict.

    blobs: a dict mapping from blob name to numpy ndarray
    blob_prefix: name prefix to use for the FPN blobs
    rois: the source rois as a 2D numpy array of shape (N, 5) where each row is
      an roi and the columns encode (batch_idx, x1, y1, x2, y2)
    target_lvls: numpy array of shape (N, ) indicating which FPN level each roi
      in rois should be assigned to
    lvl_min: the finest (highest resolution) FPN level (e.g., 2)
    lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
    """
    rois_idx_order = np.empty((0, ))
    rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
    for lvl in range(lvl_min, lvl_max + 1):
        idx_lvl = np.where(target_lvls == lvl)[0]
        blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
        rois_stacked = np.vstack(
            [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]
        )
    rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
    blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
    # Sanity check that restore order is correct
    assert (rois_stacked[rois_idx_restore] == rois).all()


# ---------------------------------------------------------------------------- #
# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)
# ---------------------------------------------------------------------------- #

FpnLevelInfo = collections.namedtuple(
    'FpnLevelInfo',
    ['blobs', 'dims', 'spatial_scales']
)


def fpn_level_info_ResNet50_conv5():
    return FpnLevelInfo(
        blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),
        dims=(2048, 1024, 512, 256),
        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
    )


def fpn_level_info_ResNet101_conv5():
    return FpnLevelInfo(
        blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),
        dims=(2048, 1024, 512, 256),
        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
    )


def fpn_level_info_ResNet152_conv5():
    return FpnLevelInfo(
        blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),
        dims=(2048, 1024, 512, 256),
        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
    )


================================================
FILE: detectron/modeling/ResNet.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Implements ResNet and ResNeXt.

See: https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.utils.net import get_group_gn


# ---------------------------------------------------------------------------- #
# Bits for specific architectures (ResNet50, ResNet101, ...)
# ---------------------------------------------------------------------------- #


def add_ResNet50_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6))


def add_ResNet50_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6, 3))


def add_ResNet101_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23))


def add_ResNet101_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23, 3))


def add_ResNet152_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 8, 36, 3))


# ---------------------------------------------------------------------------- #
# Generic ResNet components
# ---------------------------------------------------------------------------- #


def add_stage(
    model,
    prefix,
    blob_in,
    n,
    dim_in,
    dim_out,
    dim_inner,
    dilation,
    stride_init=2
):
    """Add a ResNet stage to the model by stacking n residual blocks."""
    # e.g., prefix = res2
    for i in range(n):
        blob_in = add_residual_block(
            model,
            '{}_{}'.format(prefix, i),
            blob_in,
            dim_in,
            dim_out,
            dim_inner,
            dilation,
            stride_init,
            # Not using inplace for the last block;
            # it may be fetched externally or used by FPN
            inplace_sum=i < n - 1
        )
        dim_in = dim_out
    return blob_in, dim_in


def add_ResNet_convX_body(model, block_counts):
    """Add a ResNet body from input data up through the res5 (aka conv5) stage.
    The final res5/conv5 stage may be optionally excluded (hence convX, where
    X = 4 or 5)."""
    freeze_at = cfg.TRAIN.FREEZE_AT
    assert freeze_at in [0, 2, 3, 4, 5]

    # add the stem (by default, conv1 and pool1 with bn; can support gn)
    p, dim_in = globals()[cfg.RESNETS.STEM_FUNC](model, 'data')

    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    (n1, n2, n3) = block_counts[:3]
    s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1)
    if freeze_at == 2:
        model.StopGradient(s, s)
    s, dim_in = add_stage(
        model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1
    )
    if freeze_at == 3:
        model.StopGradient(s, s)
    s, dim_in = add_stage(
        model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1
    )
    if freeze_at == 4:
        model.StopGradient(s, s)
    if len(block_counts) == 4:
        n4 = block_counts[3]
        s, dim_in = add_stage(
            model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8,
            cfg.RESNETS.RES5_DILATION
        )
        if freeze_at == 5:
            model.StopGradient(s, s)
        return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION
    else:
        return s, dim_in, 1. / 16.


def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):
    """Adds an RoI feature transformation (e.g., RoI pooling) followed by a
    res5/conv5 head applied to each RoI."""
    # TODO(rbg): This contains Fast R-CNN specific config options making it non-
    # reusable; make this more generic with model-specific wrappers
    model.RoIFeatureTransform(
        blob_in,
        'pool5',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )
    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)
    s, dim_in = add_stage(
        model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1,
        stride_init
    )
    s = model.AveragePool(s, 'res5_pool', kernel=7)
    return s, 2048


def add_residual_block(
    model,
    prefix,
    blob_in,
    dim_in,
    dim_out,
    dim_inner,
    dilation,
    stride_init=2,
    inplace_sum=False
):
    """Add a residual block to the model."""
    # prefix = res<stage>_<sub_stage>, e.g., res2_3

    # Max pooling is performed prior to the first stage (which is uniquely
    # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage
    stride = stride_init if (
        dim_in != dim_out and dim_in != 64 and dilation == 1
    ) else 1

    # transformation blob
    tr = globals()[cfg.RESNETS.TRANS_FUNC](
        model,
        blob_in,
        dim_in,
        dim_out,
        stride,
        prefix,
        dim_inner,
        group=cfg.RESNETS.NUM_GROUPS,
        dilation=dilation
    )

    # sum -> ReLU
    # shortcut function: by default using bn; support gn
    add_shortcut = globals()[cfg.RESNETS.SHORTCUT_FUNC]
    sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)
    if inplace_sum:
        s = model.net.Sum([tr, sc], tr)
    else:
        s = model.net.Sum([tr, sc], prefix + '_sum')

    return model.Relu(s, s)


# ------------------------------------------------------------------------------
# various shortcuts (may expand and may consider a new helper)
# ------------------------------------------------------------------------------


def basic_bn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
    """ For a pre-trained network that used BN. An AffineChannel op replaces BN
    during fine-tuning.
    """

    if dim_in == dim_out:
        return blob_in

    c = model.Conv(
        blob_in,
        prefix + '_branch1',
        dim_in,
        dim_out,
        kernel=1,
        stride=stride,
        no_bias=1
    )
    return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)


def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
    if dim_in == dim_out:
        return blob_in

    # output name is prefix + '_branch1_gn'
    return model.ConvGN(
        blob_in,
        prefix + '_branch1',
        dim_in,
        dim_out,
        kernel=1,
        group_gn=get_group_gn(dim_out),
        stride=stride,
        pad=0,
        group=1,
    )


# ------------------------------------------------------------------------------
# various stems (may expand and may consider a new helper)
# ------------------------------------------------------------------------------


def basic_bn_stem(model, data, **kwargs):
    """Add a basic ResNet stem. For a pre-trained network that used BN.
    An AffineChannel op replaces BN during fine-tuning.
    """

    dim = 64
    p = model.Conv(data, 'conv1', 3, dim, 7, pad=3, stride=2, no_bias=1)
    p = model.AffineChannel(p, 'res_conv1_bn', dim=dim, inplace=True)
    p = model.Relu(p, p)
    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
    return p, dim


def basic_gn_stem(model, data, **kwargs):
    """Add a basic ResNet stem (using GN)"""

    dim = 64
    p = model.ConvGN(
        data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2
    )
    p = model.Relu(p, p)
    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
    return p, dim


# ------------------------------------------------------------------------------
# various transformations (may expand and may consider a new helper)
# ------------------------------------------------------------------------------


def bottleneck_transformation(
    model,
    blob_in,
    dim_in,
    dim_out,
    stride,
    prefix,
    dim_inner,
    dilation=1,
    group=1
):
    """Add a bottleneck transformation to the model."""
    # In original resnet, stride=2 is on 1x1.
    # In fb.torch resnet, stride=2 is on 3x3.
    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)

    # conv 1x1 -> BN -> ReLU
    cur = model.ConvAffine(
        blob_in,
        prefix + '_branch2a',
        dim_in,
        dim_inner,
        kernel=1,
        stride=str1x1,
        pad=0,
        inplace=True
    )
    cur = model.Relu(cur, cur)

    # conv 3x3 -> BN -> ReLU
    cur = model.ConvAffine(
        cur,
        prefix + '_branch2b',
        dim_inner,
        dim_inner,
        kernel=3,
        stride=str3x3,
        pad=1 * dilation,
        dilation=dilation,
        group=group,
        inplace=True
    )
    cur = model.Relu(cur, cur)

    # conv 1x1 -> BN (no ReLU)
    # NB: for now this AffineChannel op cannot be in-place due to a bug in C2
    # gradient computation for graphs like this
    cur = model.ConvAffine(
        cur,
        prefix + '_branch2c',
        dim_inner,
        dim_out,
        kernel=1,
        stride=1,
        pad=0,
        inplace=False
    )
    return cur


def bottleneck_gn_transformation(
    model,
    blob_in,
    dim_in,
    dim_out,
    stride,
    prefix,
    dim_inner,
    dilation=1,
    group=1
):
    """Add a bottleneck transformation with GroupNorm to the model."""
    # In original resnet, stride=2 is on 1x1.
    # In fb.torch resnet, stride=2 is on 3x3.
    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)

    # conv 1x1 -> GN -> ReLU
    cur = model.ConvGN(
        blob_in,
        prefix + '_branch2a',
        dim_in,
        dim_inner,
        kernel=1,
        group_gn=get_group_gn(dim_inner),
        stride=str1x1,
        pad=0,
    )
    cur = model.Relu(cur, cur)

    # conv 3x3 -> GN -> ReLU
    cur = model.ConvGN(
        cur,
        prefix + '_branch2b',
        dim_inner,
        dim_inner,
        kernel=3,
        group_gn=get_group_gn(dim_inner),
        stride=str3x3,
        pad=1 * dilation,
        dilation=dilation,
        group=group,
    )
    cur = model.Relu(cur, cur)

    # conv 1x1 -> GN (no ReLU)
    cur = model.ConvGN(
        cur,
        prefix + '_branch2c',
        dim_inner,
        dim_out,
        kernel=1,
        group_gn=get_group_gn(dim_out),
        stride=1,
        pad=0,
    )
    return cur


================================================
FILE: detectron/modeling/VGG16.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""VGG16 from https://arxiv.org/abs/1409.1556."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg


def add_VGG16_conv5_body(model):
    model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)
    model.Relu('conv1_1', 'conv1_1')
    model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)
    model.Relu('conv1_2', 'conv1_2')
    model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)
    model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)
    model.Relu('conv2_1', 'conv2_1')
    model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)
    model.Relu('conv2_2', 'conv2_2')
    model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)
    model.StopGradient('pool2', 'pool2')
    model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)
    model.Relu('conv3_1', 'conv3_1')
    model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_2', 'conv3_2')
    model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_3', 'conv3_3')
    model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)
    model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv4_1', 'conv4_1')
    model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_2', 'conv4_2')
    model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_3', 'conv4_3')
    model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)
    model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_1', 'conv5_1')
    model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_2', 'conv5_2')
    model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5_3', 'conv5_3')
    return blob_out, 512, 1. / 16.


def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(
        blob_in,
        'pool5',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=7,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )
    model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 4096)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 4096


================================================
FILE: detectron/modeling/VGG_CNN_M_1024.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg


def add_VGG_CNN_M_1024_conv5_body(model):
    model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)
    model.Relu('conv1', 'conv1')
    model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)
    model.StopGradient('pool1', 'pool1')
    # No updates at conv1 and below (norm1 and pool1 have no params,
    # so we can stop gradients before them, too)
    model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)
    model.Relu('conv2', 'conv2')
    model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)
    model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv3', 'conv3')
    model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4', 'conv4')
    model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5', 'conv5')
    return blob_out, 512, 1. / 16.


def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(
        blob_in,
        'pool5',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=6,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )
    model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 1024)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 1024


================================================
FILE: detectron/modeling/__init__.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################


================================================
FILE: detectron/modeling/detector.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Defines DetectionModelHelper, the class that represents a Detectron model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging

from caffe2.python import cnn
from caffe2.python import core
from caffe2.python import workspace
from caffe2.python.modeling import initializers
from caffe2.python.modeling.parameter_info import ParameterTags

from detectron.core.config import cfg
from detectron.ops.collect_and_distribute_fpn_rpn_proposals \
    import CollectAndDistributeFpnRpnProposalsOp
from detectron.ops.generate_proposal_labels import GenerateProposalLabelsOp
from detectron.ops.generate_proposals import GenerateProposalsOp
import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
import detectron.utils.c2 as c2_utils

logger = logging.getLogger(__name__)


class DetectionModelHelper(cnn.CNNModelHelper):
    def __init__(self, **kwargs):
        # Handle args specific to the DetectionModelHelper, others pass through
        # to CNNModelHelper
        self.train = kwargs.get('train', False)
        self.num_classes = kwargs.get('num_classes', -1)
        assert self.num_classes > 0, 'num_classes must be > 0'
        for k in ('train', 'num_classes'):
            if k in kwargs:
                del kwargs[k]
        kwargs['order'] = 'NCHW'
        # Defensively set cudnn_exhaustive_search to False in case the default
        # changes in CNNModelHelper. The detection code uses variable size
        # inputs that might not play nicely with cudnn_exhaustive_search.
        kwargs['cudnn_exhaustive_search'] = False
        super(DetectionModelHelper, self).__init__(**kwargs)
        self.roi_data_loader = None
        self.losses = []
        self.metrics = []
        self.do_not_update_params = []  # Param on this list are not updated
        self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE
        self.net.Proto().num_workers = cfg.NUM_GPUS * 4
        self.prev_use_cudnn = self.use_cudnn
        self.gn_params = []  # Param on this list are GroupNorm parameters

    def TrainableParams(self, gpu_id=-1):
        """Get the blob names for all trainable parameters, possibly filtered by
        GPU id.
        """
        return [
            p for p in self.params
            if (
                p in self.param_to_grad and   # p has a gradient
                p not in self.do_not_update_params and  # not on the blacklist
                (gpu_id == -1 or  # filter for gpu assignment, if gpu_id set
                 str(p).find('gpu_{}'.format(gpu_id)) == 0)
            )]

    def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
        """Affine transformation to replace BN in networks where BN cannot be
        used (e.g., because the minibatch size is too small).

        The operations can be done in place to save memory.
        """
        blob_out = blob_out or self.net.NextName()
        param_prefix = blob_out

        scale = self.create_param(
            param_name=param_prefix + '_s',
            initializer=initializers.Initializer("ConstantFill", value=1.),
            tags=ParameterTags.WEIGHT,
            shape=[dim, ],
        )
        bias = self.create_param(
            param_name=param_prefix + '_b',
            initializer=initializers.Initializer("ConstantFill", value=0.),
            tags=ParameterTags.BIAS,
            shape=[dim, ],
        )
        if inplace:
            return self.net.AffineChannel([blob_in, scale, bias], blob_in)
        else:
            return self.net.AffineChannel([blob_in, scale, bias], blob_out)

    def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):
        """Op for generating RPN porposals.

        blobs_in:
          - 'rpn_cls_probs': 4D tensor of shape (N, A, H, W), where N is the
            number of minibatch images, A is the number of anchors per
            locations, and (H, W) is the spatial size of the prediction grid.
            Each value represents a "probability of object" rating in [0, 1].
          - 'rpn_bbox_pred': 4D tensor of shape (N, 4 * A, H, W) of predicted
            deltas for transformation anchor boxes into RPN proposals.
          - 'im_info': 2D tensor of shape (N, 3) where the three columns encode
            the input image's [height, width, scale]. Height and width are
            for the input to the network, not the original image; scale is the
            scale factor used to scale the original image to the network input
            size.

        blobs_out:
          - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the
            five columns encode [batch ind, x1, y1, x2, y2]. The boxes are
            w.r.t. the network input, which is a *scaled* version of the
            original image; these proposals must be scaled by 1 / scale (where
            scale comes from im_info; see above) to transform it back to the
            original input image coordinate system.
          - 'rpn_roi_probs': 1D tensor of objectness probability scores
            (extracted from rpn_cls_probs; see above).
        """
        cfg_key = 'TRAIN' if self.train else 'TEST'

        if cfg[cfg_key].GENERATE_PROPOSALS_ON_GPU:
            rpn_pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
            rpn_post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
            rpn_nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
            rpn_min_size = float(cfg[cfg_key].RPN_MIN_SIZE)

            input_name = str(blobs_in[0])
            lvl = int(input_name[-1]) if input_name[-1].isdigit() else None
            anchors_name = 'anchors{}'.format(lvl) if lvl else 'anchors'

            for i in range(cfg.NUM_GPUS):
                with c2_utils.CudaScope(i):
                    workspace.FeedBlob(
                        'gpu_{}/{}'.format(i, anchors_name),
                        anchors.astype(np.float32))

            self.net.GenerateProposals(
                blobs_in + [anchors_name],
                blobs_out,
                spatial_scale=spatial_scale,
                pre_nms_topN=rpn_pre_nms_topN,
                post_nms_topN=rpn_post_nms_topN,
                nms_thresh=rpn_nms_thresh,
                min_size=rpn_min_size,
            )
        else:
            name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])
            # spatial_scale passed to the Python op is only used in
            # convert_pkl_to_pb
            self.net.Python(
                GenerateProposalsOp(anchors, spatial_scale, self.train).forward
            )(blobs_in, blobs_out, name=name, spatial_scale=spatial_scale)

        return blobs_out

    def GenerateProposalLabels(self, blobs_in):
        """Op for generating training labels for RPN proposals. This is used
        when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
        Faster R-CNN training).

        blobs_in:
          - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
          - 'roidb': roidb entries that will be labeled
          - 'im_info': See GenerateProposals doc.

        blobs_out:
          - (variable set of blobs): returns whatever blobs are required for
            training the model. It does this by querying the data loader for
            the list of blobs that are needed.
        """
        name = 'GenerateProposalLabelsOp:' + ','.join(
            [str(b) for b in blobs_in]
        )

        # The list of blobs is not known before run-time because it depends on
        # the specific model being trained. Query the data loader to get the
        # list of output blob names.
        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(
            is_training=self.train
        )
        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

        self.net.Python(GenerateProposalLabelsOp().forward)(
            blobs_in, blobs_out, name=name
        )
        return blobs_out

    def CollectAndDistributeFpnRpnProposals(self):
        """Merge RPN proposals generated at multiple FPN levels and then
        distribute those proposals to their appropriate FPN levels. An anchor
        at one FPN level may predict an RoI that will map to another level,
        hence the need to redistribute the proposals.

        This function assumes standard blob names for input and output blobs.

        Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,
                      rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]
          - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois
            documentation from GenerateProposals.
          - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN
            level i; see rpn_roi_probs documentation from GenerateProposals.

        If used during training, then the input blobs will also include:
          [roidb, im_info] (see GenerateProposalLabels).

        Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,
                       rois_idx_restore]
          - rois_fpn<i> are the RPN proposals for FPN level i
          - rois_idx_restore is a permutation on the concatenation of all
            rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are
            restored to their original order in the input blobs.

        If used during training, then the output blobs will also include:
          [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
        """
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL

        # Prepare input blobs
        rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
        score_names = [
            'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)
        ]
        blobs_in = rois_names + score_names
        if self.train:
            blobs_in += ['roidb', 'im_info']
        blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]
        name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join(
            [str(b) for b in blobs_in]
        )

        # Prepare output blobs
        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(
            is_training=self.train
        )
        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

        outputs = self.net.Python(
            CollectAndDistributeFpnRpnProposalsOp(self.train).forward
        )(blobs_in, blobs_out, name=name)

        return outputs

    def DropoutIfTraining(self, blob_in, dropout_rate):
        """Add dropout to blob_in if the model is in training mode and
        dropout_rate is > 0."""
        blob_out = blob_in
        if self.train and dropout_rate > 0:
            blob_out = self.Dropout(
                blob_in, blob_in, ratio=dropout_rate, is_test=False
            )
        return blob_out

    def RoIFeatureTransform(
        self,
        blobs_in,
        blob_out,
        blob_rois='rois',
        method='RoIPoolF',
        resolution=7,
        spatial_scale=1. / 16.,
        sampling_ratio=0
    ):
        """Add the specified RoI pooling method. The sampling_ratio argument
        is supported for some, but not all, RoI transform methods.

        RoIFeatureTransform abstracts away:
          - Use of FPN or not
          - Specifics of the transform method
        """
        assert method in {'RoIPoolF', 'RoIAlign'}, \
            'Unknown pooling method: {}'.format(method)
        has_argmax = (method == 'RoIPoolF')
        if isinstance(blobs_in, list):
            # FPN case: add RoIFeatureTransform to each FPN level
            k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid
            k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid
            assert len(blobs_in) == k_max - k_min + 1
            bl_out_list = []
            for lvl in range(k_min, k_max + 1):
                bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
                sc = spatial_scale[k_max - lvl]  # in reversed order
                bl_rois = blob_rois + '_fpn' + str(lvl)
                bl_out = blob_out + '_fpn' + str(lvl)
                bl_out_list.append(bl_out)
                bl_argmax = ['_argmax_' + bl_out] if has_argmax else []
                self.net.__getattr__(method)(
                    [bl_in, bl_rois], [bl_out] + bl_argmax,
                    pooled_w=resolution,
                    pooled_h=resolution,
                    spatial_scale=sc,
                    sampling_ratio=sampling_ratio
                )
            # The pooled features from all levels are concatenated along the
            # batch dimension into a single 4D tensor.
            xform_shuffled, _ = self.net.Concat(
                bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out],
                axis=0
            )
            # Unshuffle to match rois from dataloader
            restore_bl = blob_rois + '_idx_restore_int32'
            xform_out = self.net.BatchPermutation(
                [xform_shuffled, restore_bl], blob_out
            )
        else:
            # Single feature level
            bl_argmax = ['_argmax_' + blob_out] if has_argmax else []
            # sampling_ratio is ignored for RoIPoolF
            xform_out = self.net.__getattr__(method)(
                [blobs_in, blob_rois], [blob_out] + bl_argmax,
                pooled_w=resolution,
                pooled_h=resolution,
                spatial_scale=spatial_scale,
                sampling_ratio=sampling_ratio
            )
        # Only return the first blob (the transformed features)
        return xform_out[0] if isinstance(xform_out, tuple) else xform_out

    def ConvShared(
        self,
        blob_in,
        blob_out,
        dim_in,
        dim_out,
        kernel,
        weight=None,
        bias=None,
        **kwargs
    ):
        """Add conv op that shares weights and/or biases with another conv op.
        """
        use_bias = (
            False if ('no_bias' in kwargs and kwargs['no_bias']) else True
        )

        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
            if self.ws_nbytes_limit:
                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit

        if use_bias:
            blobs_in = [blob_in, weight, bias]
        else:
            blobs_in = [blob_in, weight]

        if 'no_bias' in kwargs:
            del kwargs['no_bias']

        return self.net.Conv(
            blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs
        )

    def BilinearInterpolation(
        self, blob_in, blob_out, dim_in, dim_out, up_scale
    ):
        """Bilinear interpolation in space of scale.

        Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale

        Adapted from the CVPR'15 FCN code.
        See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
        """
        assert dim_in == dim_out
        assert up_scale % 2 == 0, 'Scale should be even'

        def upsample_filt(size):
            factor = (size + 1) // 2
            if size % 2 == 1:
                center = factor - 1
            else:
                center = factor - 0.5
            og = np.ogrid[:size, :size]
            return ((1 - abs(og[0] - center) / factor) *
                    (1 - abs(og[1] - center) / factor))

        kernel_size = up_scale * 2
        bil_filt = upsample_filt(kernel_size)

        kernel = np.zeros(
            (dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32
        )
        kernel[range(dim_out), range(dim_in), :, :] = bil_filt

        blob = self.ConvTranspose(
            blob_in,
            blob_out,
            dim_in,
            dim_out,
            kernel_size,
            stride=int(up_scale),
            pad=int(up_scale / 2),
            weight_init=('GivenTensorFill', {'values': kernel}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        self.do_not_update_params.append(self.weights[-1])
        self.do_not_update_params.append(self.biases[-1])
        return blob

    def ConvAffine(  # args in the same order of Conv()
        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
        group=1, dilation=1,
        weight_init=None,
        bias_init=None,
        suffix='_bn',
        inplace=False
    ):
        """ConvAffine adds a Conv op followed by a AffineChannel op (which
        replaces BN during fine tuning).
        """
        conv_blob = self.Conv(
            blob_in,
            prefix,
            dim_in,
            dim_out,
            kernel,
            stride=stride,
            pad=pad,
            group=group,
            dilation=dilation,
            weight_init=weight_init,
            bias_init=bias_init,
            no_bias=1
        )
        blob_out = self.AffineChannel(
            conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
        )
        return blob_out

    def ConvGN(  # args in the same order of Conv()
        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
        group_gn,  # num of groups in gn
        group=1, dilation=1,
        weight_init=None,
        bias_init=None,
        suffix='_gn',
        no_conv_bias=1,
    ):
        """ConvGN adds a Conv op followed by a GroupNorm op,
        including learnable scale/bias (gamma/beta)
        """
        conv_blob = self.Conv(
            blob_in,
            prefix,
            dim_in,
            dim_out,
            kernel,
            stride=stride,
            pad=pad,
            group=group,
            dilation=dilation,
            weight_init=weight_init,
            bias_init=bias_init,
            no_bias=no_conv_bias)

        if group_gn < 1:
            logger.warning(
                'Layer: {} (dim {}): '
                'group_gn < 1; reset to 1.'.format(prefix, dim_in)
            )
            group_gn = 1

        blob_out = self.SpatialGN(
            conv_blob, prefix + suffix,
            dim_out, group=group_gn,  # op's arg name is "group"
            epsilon=cfg.GROUP_NORM.EPSILON,)

        self.gn_params.append(self.params[-1])  # add gn's bias to list
        self.gn_params.append(self.params[-2])  # add gn's scale to list
        return blob_out

    def DisableCudnn(self):
        self.prev_use_cudnn = self.use_cudnn
        self.use_cudnn = False

    def RestorePreviousUseCudnn(self):
        prev_use_cudnn = self.use_cudnn
        self.use_cudnn = self.prev_use_cudnn
        self.prev_use_cudnn = prev_use_cudnn

    def UpdateWorkspaceLr(self, cur_iter, new_lr):
        """Updates the model's current learning rate and the workspace (learning
        rate and update history/momentum blobs).
        """
        # The workspace is the one source of truth for the lr
        # The lr is always the same on all GPUs
        cur_lr = workspace.FetchBlob('gpu_0/lr')[0]
        # There are no type conversions between the lr in Python and the lr in
        # the GPU (both are float32), so exact comparision is ok
        if cur_lr != new_lr:
            ratio = _get_lr_change_ratio(cur_lr, new_lr)
            if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
                logger.info(
                    'Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.
                    format(cur_lr, new_lr, cur_iter))
            self._SetNewLr(cur_lr, new_lr)
        return new_lr

    def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

    def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

    def GetLossScale(self):
        """Allow a way to configure the loss scale dynamically.

        This may be used in a distributed data parallel setting.
        """
        return 1.0 / cfg.NUM_GPUS

    def AddLosses(self, losses):
        if not isinstance(losses, list):
            losses = [losses]
        # Conversion to str allows losses to include BlobReferences
        losses = [c2_utils.UnscopeName(str(l)) for l in losses]
        self.losses = list(set(self.losses + losses))

    def AddMetrics(self, metrics):
        if not isinstance(metrics, list):
            metrics = [metrics]
        self.metrics = list(set(self.metrics + metrics))


def _get_lr_change_ratio(cur_lr, new_lr):
    eps = 1e-10
    ratio = np.max(
        (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))
    )
    return ratio


================================================
FILE: detectron/modeling/fast_rcnn_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Various network "heads" for classification and bounding box prediction.

The design is as follows:

... -> RoI ----\                               /-> box cls output -> cls loss
                -> RoIFeatureXform -> box head
... -> Feature /                               \-> box reg output -> reg loss
       Map

The Fast R-CNN head produces a feature representation of the RoI for the purpose
of bounding box classification and regression. The box output module converts
the feature representation into classification and regression predictions.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill
from detectron.utils.net import get_group_gn
import detectron.utils.blob as blob_utils


# ---------------------------------------------------------------------------- #
# Fast R-CNN outputs and losses
# ---------------------------------------------------------------------------- #

def add_fast_rcnn_outputs(model, blob_in, dim):
    """Add RoI classification and bounding box regression output ops."""
    # Box classification layer
    model.FC(
        blob_in,
        'cls_score',
        dim,
        model.num_classes,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    if not model.train:  # == if test
        # Only add softmax when testing; during training the softmax is combined
        # with the label cross entropy loss for numerical stability
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    # Box regression layer
    num_bbox_reg_classes = (
        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes
    )
    model.FC(
        blob_in,
        'bbox_pred',
        dim,
        num_bbox_reg_classes * 4,
        weight_init=gauss_fill(0.001),
        bias_init=const_fill(0.0)
    )


def add_fast_rcnn_losses(model):
    """Add losses for RoI classification and bounding box regression."""
    cls_prob, loss_cls = model.net.SoftmaxWithLoss(
        ['cls_score', 'labels_int32'], ['cls_prob', 'loss_cls'],
        scale=model.GetLossScale()
    )
    loss_bbox = model.net.SmoothL1Loss(
        [
            'bbox_pred', 'bbox_targets', 'bbox_inside_weights',
            'bbox_outside_weights'
        ],
        'loss_bbox',
        scale=model.GetLossScale()
    )
    loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox])
    model.Accuracy(['cls_prob', 'labels_int32'], 'accuracy_cls')
    model.AddLosses(['loss_cls', 'loss_bbox'])
    model.AddMetrics('accuracy_cls')
    return loss_gradients


# ---------------------------------------------------------------------------- #
# Box heads
# ---------------------------------------------------------------------------- #

def add_roi_2mlp_head(model, blob_in, dim_in, spatial_scale):
    """Add a ReLU MLP with two hidden layers."""
    hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
    roi_feat = model.RoIFeatureTransform(
        blob_in,
        'roi_feat',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=roi_size,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )
    model.FC(roi_feat, 'fc6', dim_in * roi_size * roi_size, hidden_dim)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', hidden_dim, hidden_dim)
    model.Relu('fc7', 'fc7')
    return 'fc7', hidden_dim


def add_roi_Xconv1fc_head(model, blob_in, dim_in, spatial_scale):
    """Add a X conv + 1fc head, as a reference if not using GroupNorm"""
    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
    roi_feat = model.RoIFeatureTransform(
        blob_in,
        'roi_feat',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=roi_size,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    current = roi_feat
    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
        current = model.Conv(
            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,
            stride=1, pad=1,
            weight_init=('MSRAFill', {}),
            bias_init=('ConstantFill', {'value': 0.}),
            no_bias=0)
        current = model.Relu(current, current)
        dim_in = hidden_dim

    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)
    model.Relu('fc6', 'fc6')
    return 'fc6', fc_dim


def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale):
    """Add a X conv + 1fc head, with GroupNorm"""
    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
    roi_feat = model.RoIFeatureTransform(
        blob_in, 'roi_feat',
        blob_rois='rois',
        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
        resolution=roi_size,
        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    current = roi_feat
    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
        current = model.ConvGN(
            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,
            group_gn=get_group_gn(hidden_dim),
            stride=1, pad=1,
            weight_init=('MSRAFill', {}),
            bias_init=('ConstantFill', {'value': 0.}))
        current = model.Relu(current, current)
        dim_in = hidden_dim

    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)
    model.Relu('fc6', 'fc6')
    return 'fc6', fc_dim


================================================
FILE: detectron/modeling/generate_anchors.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import numpy as np

# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
#    >> anchors
#
#    anchors =
#
#       -83   -39   100    56
#      -175   -87   192   104
#      -359  -183   376   200
#       -55   -55    72    72
#      -119  -119   136   136
#      -247  -247   264   264
#       -35   -79    52    96
#       -79  -167    96   184
#      -167  -343   184   360

# array([[ -83.,  -39.,  100.,   56.],
#        [-175.,  -87.,  192.,  104.],
#        [-359., -183.,  376.,  200.],
#        [ -55.,  -55.,   72.,   72.],
#        [-119., -119.,  136.,  136.],
#        [-247., -247.,  264.,  264.],
#        [ -35.,  -79.,   52.,   96.],
#        [ -79., -167.,   96.,  184.],
#        [-167., -343.,  184.,  360.]])


def generate_anchors(
    stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
):
    """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    are centered on stride / 2, have (approximate) sqrt areas of the specified
    sizes, and aspect ratios as given.
    """
    return _generate_anchors(
        stride,
        np.array(sizes, dtype=float) / stride,
        np.array(aspect_ratios, dtype=float)
    )


def _generate_anchors(base_size, scales, aspect_ratios):
    """Generate anchor (reference) windows by enumerating aspect ratios X
    scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
    """
    anchor = np.array([1, 1, base_size, base_size], dtype=float) - 1
    anchors = _ratio_enum(anchor, aspect_ratios)
    anchors = np.vstack(
        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
    )
    return anchors


def _whctrs(anchor):
    """Return width, height, x center, and y center for an anchor (window)."""
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr


def _mkanchors(ws, hs, x_ctr, y_ctr):
    """Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    """
    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack(
        (
            x_ctr - 0.5 * (ws - 1),
            y_ctr - 0.5 * (hs - 1),
            x_ctr + 0.5 * (ws - 1),
            y_ctr + 0.5 * (hs - 1)
        )
    )
    return anchors


def _ratio_enum(anchor, ratios):
    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    ws = np.round(np.sqrt(size_ratios))
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


def _scale_enum(anchor, scales):
    """Enumerate a set of anchors for each scale wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


================================================
FILE: detectron/modeling/keypoint_rcnn_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Various network "heads" for predicting keypoints in Mask R-CNN.

The design is as follows:

... -> RoI ----\
                -> RoIFeatureXform -> keypoint head -> keypoint output -> loss
... -> Feature /
       Map

The keypoint head produces a feature representation of the RoI for the purpose
of keypoint prediction. The keypoint output module converts the feature
representation into keypoint heatmaps.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill
import detectron.modeling.ResNet as ResNet
import detectron.utils.blob as blob_utils


# ---------------------------------------------------------------------------- #
# Keypoint R-CNN outputs and losses
# ---------------------------------------------------------------------------- #

def add_keypoint_outputs(model, blob_in, dim):
    """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps."""
    # NxKxHxW
    upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)

    if cfg.KRCNN.USE_DECONV:
        # Apply ConvTranspose to the feature representation; results in 2x
        # upsampling
        blob_in = model.ConvTranspose(
            blob_in,
            'kps_deconv',
            dim,
            cfg.KRCNN.DECONV_DIM,
            kernel=cfg.KRCNN.DECONV_KERNEL,
            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
            stride=2,
            weight_init=gauss_fill(0.01),
            bias_init=const_fill(0.0)
        )
        model.Relu('kps_deconv', 'kps_deconv')
        dim = cfg.KRCNN.DECONV_DIM

    if upsample_heatmap:
        blob_name = 'kps_score_lowres'
    else:
        blob_name = 'kps_score'

    if cfg.KRCNN.USE_DECONV_OUTPUT:
        # Use ConvTranspose to predict heatmaps; results in 2x upsampling
        blob_out = model.ConvTranspose(
            blob_in,
            blob_name,
            dim,
            cfg.KRCNN.NUM_KEYPOINTS,
            kernel=cfg.KRCNN.DECONV_KERNEL,
            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
            stride=2,
            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )
    else:
        # Use Conv to predict heatmaps; does no upsampling
        blob_out = model.Conv(
            blob_in,
            blob_name,
            dim,
            cfg.KRCNN.NUM_KEYPOINTS,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )

    if upsample_heatmap:
        # Increase heatmap output size via bilinear upsampling
        blob_out = model.BilinearInterpolation(
            blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS,
            cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE
        )

    return blob_out


def add_keypoint_losses(model):
    """Add Mask R-CNN keypoint specific losses."""
    # Reshape input from (N, K, H, W) to (NK, HW)
    model.net.Reshape(
        ['kps_score'], ['kps_score_reshaped', '_kps_score_old_shape'],
        shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE)
    )
    # Softmax across **space** (woahh....space!)
    # Note: this is not what is commonly called "spatial softmax"
    # (i.e., softmax applied along the channel dimension at each spatial
    # location); This is softmax applied over a set of spatial locations (i.e.,
    # each spatial location is a "class").
    kps_prob, loss_kps = model.net.SoftmaxWithLoss(
        ['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'],
        ['kps_prob', 'loss_kps'],
        scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS,
        spatial=0
    )
    if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
        # Discussion: the softmax loss above will average the loss by the sum of
        # keypoint_weights, i.e. the total number of visible keypoints. Since
        # the number of visible keypoints can vary significantly between
        # minibatches, this has the effect of up-weighting the importance of
        # minibatches with few visible keypoints. (Imagine the extreme case of
        # only one visible keypoint versus N: in the case of N, each one
        # contributes 1/N to the gradient compared to the single keypoint
        # determining the gradient direction). Instead, we can normalize the
        # loss by the total number of keypoints, if it were the case that all
        # keypoints were visible in a full minibatch. (Returning to the example,
        # this means that the one visible keypoint contributes as much as each
        # of the N keypoints.)
        model.StopGradient(
            'keypoint_loss_normalizer', 'keypoint_loss_normalizer'
        )
        loss_kps = model.net.Mul(
            ['loss_kps', 'keypoint_loss_normalizer'], 'loss_kps_normalized'
        )
    loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps])
    model.AddLosses(loss_kps)
    return loss_gradients


# ---------------------------------------------------------------------------- #
# Keypoint heads
# ---------------------------------------------------------------------------- #

def add_ResNet_roi_conv5_head_for_keypoints(
    model, blob_in, dim_in, spatial_scale
):
    """Add a ResNet "conv5" / "stage5" head for Mask R-CNN keypoint prediction.
    """
    model.RoIFeatureTransform(
        blob_in,
        '_[pose]_pool5',
        blob_rois='keypoint_rois',
        method=cfg.KRCNN.ROI_XFORM_METHOD,
        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )
    # Using the prefix '_[pose]_' to 'res5' enables initializing the head's
    # parameters using pretrained 'res5' parameters if given (see
    # utils.net.initialize_from_weights_file)
    s, dim_in = ResNet.add_stage(
        model,
        '_[pose]_res5',
        '_[pose]_pool5',
        3,
        dim_in,
        2048,
        512,
        cfg.KRCNN.DILATION,
        stride_init=int(cfg.KRCNN.ROI_XFORM_RESOLUTION / 7)
    )
    return s, 2048


def add_roi_pose_head_v1convX(model, blob_in, dim_in, spatial_scale):
    """Add a Mask R-CNN keypoint head. v1convX design: X * (conv)."""
    hidden_dim = cfg.KRCNN.CONV_HEAD_DIM
    kernel_size = cfg.KRCNN.CONV_HEAD_KERNEL
    pad_size = kernel_size // 2
    current = model.RoIFeatureTransform(
        blob_in,
        '_[pose]_roi_feat',
        blob_rois='keypoint_rois',
        method=cfg.KRCNN.ROI_XFORM_METHOD,
        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    for i in range(cfg.KRCNN.NUM_STACKED_CONVS):
        current = model.Conv(
            current,
            'conv_fcn' + str(i + 1),
            dim_in,
            hidden_dim,
            kernel_size,
            stride=1,
            pad=pad_size,
            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.01}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = hidden_dim

    return current, hidden_dim


================================================
FILE: detectron/modeling/mask_rcnn_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Various network "heads" for predicting masks in Mask R-CNN.

The design is as follows:

... -> RoI ----\
                -> RoIFeatureXform -> mask head -> mask output -> loss
... -> Feature /
       Map

The mask head produces a feature representation of the RoI for the purpose
of mask prediction. The mask output module converts the feature representation
into real-valued (soft) masks.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill
from detectron.utils.net import get_group_gn
import detectron.modeling.ResNet as ResNet
import detectron.utils.blob as blob_utils


# ---------------------------------------------------------------------------- #
# Mask R-CNN outputs and losses
# ---------------------------------------------------------------------------- #

def add_mask_rcnn_outputs(model, blob_in, dim):
    """Add Mask R-CNN specific outputs: either mask logits or probs."""
    num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1

    if cfg.MRCNN.USE_FC_OUTPUT:
        # Predict masks with a fully connected layer (ignore 'fcn' in the blob
        # name)
        dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2)
        blob_out = model.FC(
            blob_in,
            'mask_fcn_logits',
            dim_fc,
            num_cls * cfg.MRCNN.RESOLUTION**2,
            weight_init=gauss_fill(0.001),
            bias_init=const_fill(0.0)
        )
    else:
        # Predict mask using Conv

        # Use GaussianFill for class-agnostic mask prediction; fills based on
        # fan-in can be too large in this case and cause divergence
        fill = (
            cfg.MRCNN.CONV_INIT
            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill'
        )
        blob_out = model.Conv(
            blob_in,
            'mask_fcn_logits',
            dim,
            num_cls,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(fill, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )

        if cfg.MRCNN.UPSAMPLE_RATIO > 1:
            blob_out = model.BilinearInterpolation(
                'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls,
                cfg.MRCNN.UPSAMPLE_RATIO
            )

    if not model.train:  # == if test
        blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')

    return blob_out


def add_mask_rcnn_losses(model, blob_mask):
    """Add Mask R-CNN specific losses."""
    loss_mask = model.net.SigmoidCrossEntropyLoss(
        [blob_mask, 'masks_int32'],
        'loss_mask',
        scale=model.GetLossScale() * cfg.MRCNN.WEIGHT_LOSS_MASK
    )
    loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask])
    model.AddLosses('loss_mask')
    return loss_gradients


# ---------------------------------------------------------------------------- #
# Mask heads
# ---------------------------------------------------------------------------- #

def mask_rcnn_fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale):
    """v1up design: 4 * (conv 3x3), convT 2x2."""
    return mask_rcnn_fcn_head_v1upXconvs(
        model, blob_in, dim_in, spatial_scale, 4
    )


def mask_rcnn_fcn_head_v1up4convs_gn(model, blob_in, dim_in, spatial_scale):
    """v1up design: 4 * (conv 3x3), convT 2x2, with GroupNorm"""
    return mask_rcnn_fcn_head_v1upXconvs_gn(
        model, blob_in, dim_in, spatial_scale, 4
    )


def mask_rcnn_fcn_head_v1up(model, blob_in, dim_in, spatial_scale):
    """v1up design: 2 * (conv 3x3), convT 2x2."""
    return mask_rcnn_fcn_head_v1upXconvs(
        model, blob_in, dim_in, spatial_scale, 2
    )


def mask_rcnn_fcn_head_v1upXconvs(
    model, blob_in, dim_in, spatial_scale, num_convs
):
    """v1upXconvs design: X * (conv 3x3), convT 2x2."""
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_[mask]_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    for i in range(num_convs):
        current = model.Conv(
            current,
            '_[mask]_fcn' + str(i + 1),
            dim_in,
            dim_inner,
            kernel=3,
            dilation=dilation,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_mask',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner


def mask_rcnn_fcn_head_v1upXconvs_gn(
    model, blob_in, dim_in, spatial_scale, num_convs
):
    """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm"""
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_mask_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    for i in range(num_convs):
        current = model.ConvGN(
            current,
            '_mask_fcn' + str(i + 1),
            dim_in,
            dim_inner,
            group_gn=get_group_gn(dim_inner),
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_mask',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner


def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):
    """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and
    computation are shared with the conv5 box head. Computation can only be
    shared during training, since inference is cascaded.

    v0upshare design: conv5, convT 2x2.
    """
    # Since box and mask head are shared, these must match
    assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION

    if model.train:  # share computation with bbox head at training time
        dim_conv5 = 2048
        blob_conv5 = model.net.SampleAs(
            ['res5_2_sum', 'roi_has_mask_int32'],
            ['_[mask]_res5_2_sum_sliced']
        )
    else:  # re-compute at test time
        blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
            model,
            blob_in,
            dim_in,
            spatial_scale
        )

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    blob_mask = model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),  # std only for gauss
        bias_init=const_fill(0.0)
    )
    model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced


def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):
    """v0up design: conv5, deconv 2x2 (no weight sharing with the box head)."""
    blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
        model,
        blob_in,
        dim_in,
        spatial_scale
    )

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=('GaussianFill', {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced


def add_ResNet_roi_conv5_head_for_masks(model, blob_in, dim_in, spatial_scale):
    """Add a ResNet "conv5" / "stage5" head for predicting masks."""
    model.RoIFeatureTransform(
        blob_in,
        blob_out='_[mask]_pool5',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    dilation = cfg.MRCNN.DILATION
    stride_init = int(cfg.MRCNN.ROI_XFORM_RESOLUTION / 7)  # by default: 2

    s, dim_in = ResNet.add_stage(
        model,
        '_[mask]_res5',
        '_[mask]_pool5',
        3,
        dim_in,
        2048,
        512,
        dilation,
        stride_init=stride_init
    )

    return s, 2048


================================================
FILE: detectron/modeling/model_builder.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Detectron model construction functions.

Detectron supports a large number of model types. The configuration space is
large. To get a sense, a given model is in element in the cartesian product of:

  - backbone (e.g., VGG16, ResNet, ResNeXt)
  - FPN (on or off)
  - RPN only (just proposals)
  - Fixed proposals for Fast R-CNN, RFCN, Mask R-CNN (with or without keypoints)
  - End-to-end model with RPN + Fast R-CNN (i.e., Faster R-CNN), Mask R-CNN, ...
  - Different "head" choices for the model
  - ... many configuration options ...

A given model is made by combining many basic components. The result is flexible
though somewhat complex to understand at first.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import importlib
import logging

from caffe2.python import core
from caffe2.python import workspace

from detectron.core.config import cfg
from detectron.modeling.detector import DetectionModelHelper
from detectron.roi_data.loader import RoIDataLoader
import detectron.modeling.fast_rcnn_heads as fast_rcnn_heads
import detectron.modeling.keypoint_rcnn_heads as keypoint_rcnn_heads
import detectron.modeling.mask_rcnn_heads as mask_rcnn_heads
import detectron.modeling.name_compat as name_compat
import detectron.modeling.optimizer as optim
import detectron.modeling.retinanet_heads as retinanet_heads
import detectron.modeling.rfcn_heads as rfcn_heads
import detectron.modeling.rpn_heads as rpn_heads
import detectron.roi_data.minibatch as roi_data_minibatch
import detectron.utils.c2 as c2_utils

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------- #
# Generic recomposable model builders
#
# For example, you can create a Fast R-CNN model with the ResNet-50-C4 backbone
# with the configuration:
#
# MODEL:
#   TYPE: generalized_rcnn
#   CONV_BODY: ResNet.add_ResNet50_conv4_body
#   ROI_HEAD: ResNet.add_ResNet_roi_conv5_head
# ---------------------------------------------------------------------------- #

def generalized_rcnn(model):
    """This model type handles:
      - Fast R-CNN
      - RPN only (not integrated with Fast R-CNN)
      - Faster R-CNN (stagewise training from NIPS paper)
      - Faster R-CNN (end-to-end joint training)
      - Mask R-CNN (stagewise training from NIPS paper)
      - Mask R-CNN (end-to-end joint training)
    """
    return build_generic_detection_model(
        model,
        get_func(cfg.MODEL.CONV_BODY),
        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),
        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),
        freeze_conv_body=cfg.TRAIN.FREEZE_CONV_BODY
    )


def rfcn(model):
    # TODO(rbg): fold into build_generic_detection_model
    return build_generic_rfcn_model(model, get_func(cfg.MODEL.CONV_BODY))


def retinanet(model):
    # TODO(rbg): fold into build_generic_detection_model
    return build_generic_retinanet_model(model, get_func(cfg.MODEL.CONV_BODY))


# ---------------------------------------------------------------------------- #
# Helper functions for building various re-usable network bits
# ---------------------------------------------------------------------------- #

def create(model_type_func, train=False, gpu_id=0):
    """Generic model creation function that dispatches to specific model
    building functions.

    By default, this function will generate a data parallel model configured to
    run on cfg.NUM_GPUS devices. However, you can restrict it to build a model
    targeted to a specific GPU by specifying gpu_id. This is used by
    optimizer.build_data_parallel_model() during test time.
    """
    model = DetectionModelHelper(
        name=model_type_func,
        train=train,
        num_classes=cfg.MODEL.NUM_CLASSES,
        init_params=train
    )
    model.only_build_forward_pass = False
    model.target_gpu_id = gpu_id
    return get_func(model_type_func)(model)


def get_func(func_name):
    """Helper to return a function object by name. func_name must identify a
    function in this module or the path to a function relative to the base
    'modeling' module.
    """
    if func_name == '':
        return None
    new_func_name = name_compat.get_new_name(func_name)
    if new_func_name != func_name:
        logger.warn(
            'Remapping old function name: {} -> {}'.
            format(func_name, new_func_name)
        )
        func_name = new_func_name
    try:
        parts = func_name.split('.')
        # Refers to a function in this module
        if len(parts) == 1:
            return globals()[parts[0]]
        # Otherwise, assume we're referencing a module under modeling
        module_name = 'detectron.modeling.' + '.'.join(parts[:-1])
        module = importlib.import_module(module_name)
        return getattr(module, parts[-1])
    except Exception:
        logger.error('Failed to find function: {}'.format(func_name))
        raise


def build_generic_detection_model(
    model,
    add_conv_body_func,
    add_roi_box_head_func=None,
    add_roi_mask_head_func=None,
    add_roi_keypoint_head_func=None,
    freeze_conv_body=False
):
    def _single_gpu_build_func(model):
        """Build the model on a single GPU. Can be called in a loop over GPUs
        with name and device scoping to create a data parallel model.
        """
        # Add the conv body (called "backbone architecture" in papers)
        # E.g., ResNet-50, ResNet-50-FPN, ResNeXt-101-FPN, etc.
        blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model)
        if freeze_conv_body:
            for b in c2_utils.BlobReferenceList(blob_conv):
                model.StopGradient(b, b)

        if not model.train:  # == inference
            # Create a net that can be used to execute the conv body on an image
            # (without also executing RPN or any other network heads)
            model.conv_body_net = model.net.Clone('conv_body_net')

        head_loss_gradients = {
            'rpn': None,
            'box': None,
            'mask': None,
            'keypoints': None,
        }

        if cfg.RPN.RPN_ON:
            # Add the RPN head
            head_loss_gradients['rpn'] = rpn_heads.add_generic_rpn_outputs(
                model, blob_conv, dim_conv, spatial_scale_conv
            )

        if cfg.FPN.FPN_ON:
            # After adding the RPN head, restrict FPN blobs and scales to
            # those used in the RoI heads
            blob_conv, spatial_scale_conv = _narrow_to_fpn_roi_levels(
                blob_conv, spatial_scale_conv
            )

        if not cfg.MODEL.RPN_ONLY:
            # Add the Fast R-CNN head
            head_loss_gradients['box'] = _add_fast_rcnn_head(
                model, add_roi_box_head_func, blob_conv, dim_conv,
                spatial_scale_conv
            )

        if cfg.MODEL.MASK_ON:
            # Add the mask head
            head_loss_gradients['mask'] = _add_roi_mask_head(
                model, add_roi_mask_head_func, blob_conv, dim_conv,
                spatial_scale_conv
            )

        if cfg.MODEL.KEYPOINTS_ON:
            # Add the keypoint head
            head_loss_gradients['keypoint'] = _add_roi_keypoint_head(
                model, add_roi_keypoint_head_func, blob_conv, dim_conv,
                spatial_scale_conv
            )

        if model.train:
            loss_gradients = {}
            for lg in head_loss_gradients.values():
                if lg is not None:
                    loss_gradients.update(lg)
            return loss_gradients
        else:
            return None

    optim.build_data_parallel_model(model, _single_gpu_build_func)
    return model


def _narrow_to_fpn_roi_levels(blobs, spatial_scales):
    """Return only the blobs and spatial scales that will be used for RoI heads.
    Inputs `blobs` and `spatial_scales` may include extra blobs and scales that
    are used for RPN proposals, but not for RoI heads.
    """
    # Code only supports case when RPN and ROI min levels are the same
    assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
    # RPN max level can be >= to ROI max level
    assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
    # FPN RPN max level might be > FPN ROI max level in which case we
    # need to discard some leading conv blobs (blobs are ordered from
    # max/coarsest level to min/finest level)
    num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
    return blobs[-num_roi_levels:], spatial_scales[-num_roi_levels:]


def _add_fast_rcnn_head(
    model, add_roi_box_head_func, blob_in, dim_in, spatial_scale_in
):
    """Add a Fast R-CNN head to the model."""
    blob_frcn, dim_frcn = add_roi_box_head_func(
        model, blob_in, dim_in, spatial_scale_in
    )
    fast_rcnn_heads.add_fast_rcnn_outputs(model, blob_frcn, dim_frcn)
    if model.train:
        loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)
    else:
        loss_gradients = None
    return loss_gradients


def _add_roi_mask_head(
    model, add_roi_mask_head_func, blob_in, dim_in, spatial_scale_in
):
    """Add a mask prediction head to the model."""
    # Capture model graph before adding the mask head
    bbox_net = copy.deepcopy(model.net.Proto())
    # Add the mask head
    blob_mask_head, dim_mask_head = add_roi_mask_head_func(
        model, blob_in, dim_in, spatial_scale_in
    )
    # Add the mask output
    blob_mask = mask_rcnn_heads.add_mask_rcnn_outputs(
        model, blob_mask_head, dim_mask_head
    )

    if not model.train:  # == inference
        # Inference uses a cascade of box predictions, then mask predictions.
        # This requires separate nets for box and mask prediction.
        # So we extract the mask prediction net, store it as its own network,
        # then restore model.net to be the bbox-only network
        model.mask_net, blob_mask = c2_utils.SuffixNet(
            'mask_net', model.net, len(bbox_net.op), blob_mask
        )
        model.net._net = bbox_net
        loss_gradients = None
    else:
        loss_gradients = mask_rcnn_heads.add_mask_rcnn_losses(model, blob_mask)
    return loss_gradients


def _add_roi_keypoint_head(
    model, add_roi_keypoint_head_func, blob_in, dim_in, spatial_scale_in
):
    """Add a keypoint prediction head to the model."""
    # Capture model graph before adding the mask head
    bbox_net = copy.deepcopy(model.net.Proto())
    # Add the keypoint head
    blob_keypoint_head, dim_keypoint_head = add_roi_keypoint_head_func(
        model, blob_in, dim_in, spatial_scale_in
    )
    # Add the keypoint output
    blob_keypoint = keypoint_rcnn_heads.add_keypoint_outputs(
        model, blob_keypoint_head, dim_keypoint_head
    )

    if not model.train:  # == inference
        # Inference uses a cascade of box predictions, then keypoint predictions
        # This requires separate nets for box and keypoint prediction.
        # So we extract the keypoint prediction net, store it as its own
        # network, then restore model.net to be the bbox-only network
        model.keypoint_net, keypoint_blob_out = c2_utils.SuffixNet(
            'keypoint_net', model.net, len(bbox_net.op), blob_keypoint
        )
        model.net._net = bbox_net
        loss_gradients = None
    else:
        loss_gradients = keypoint_rcnn_heads.add_keypoint_losses(model)
    return loss_gradients


def build_generic_rfcn_model(model, add_conv_body_func, dim_reduce=None):
    # TODO(rbg): fold this function into build_generic_detection_model
    def _single_gpu_build_func(model):
        """Builds the model on a single GPU. Can be called in a loop over GPUs
        with name and device scoping to create a data parallel model."""
        blob, dim, spatial_scale = add_conv_body_func(model)
        if not model.train:
            model.conv_body_net = model.net.Clone('conv_body_net')
        rfcn_heads.add_rfcn_outputs(model, blob, dim, dim_reduce, spatial_scale)
        if model.train:
            loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)
        return loss_gradients if model.train else None

    optim.build_data_parallel_model(model, _single_gpu_build_func)
    return model


def build_generic_retinanet_model(
    model, add_conv_body_func, freeze_conv_body=False
):
    # TODO(rbg): fold this function into build_generic_detection_model
    def _single_gpu_build_func(model):
        """Builds the model on a single GPU. Can be called in a loop over GPUs
        with name and device scoping to create a data parallel model."""
        blobs, dim, spatial_scales = add_conv_body_func(model)
        if not model.train:
            model.conv_body_net = model.net.Clone('conv_body_net')
        retinanet_heads.add_fpn_retinanet_outputs(
            model, blobs, dim, spatial_scales
        )
        if model.train:
            loss_gradients = retinanet_heads.add_fpn_retinanet_losses(
                model
            )
        return loss_gradients if model.train else None

    optim.build_data_parallel_model(model, _single_gpu_build_func)
    return model


# ---------------------------------------------------------------------------- #
# Network inputs
# ---------------------------------------------------------------------------- #

def add_training_inputs(model, roidb=None):
    """Create network input ops and blobs used for training. To be called
    *after* model_builder.create().
    """
    # Implementation notes:
    #   Typically, one would create the input ops and then the rest of the net.
    #   However, creating the input ops depends on loading the dataset, which
    #   can take a few minutes for COCO.
    #   We prefer to avoid waiting so debugging can fail fast.
    #   Thus, we create the net *without input ops* prior to loading the
    #   dataset, and then add the input ops after loading the dataset.
    #   Since we defer input op creation, we need to do a little bit of surgery
    #   to place the input ops at the start of the network op list.
    assert model.train, 'Training inputs can only be added to a trainable model'
    if roidb is not None:
        # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
        model.roi_data_loader = RoIDataLoader(
            roidb,
            num_loaders=cfg.DATA_LOADER.NUM_THREADS,
            minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
            blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
        )
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
            model.net.DequeueBlobs(
                model.roi_data_loader._blobs_queue_name, blob_names
            )
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)


def add_inference_inputs(model):
    """Create network input blobs used for inference."""

    def create_input_blobs_for_net(net_def):
        for op in net_def.op:
            for blob_in in op.input:
                if not workspace.HasBlob(blob_in):
                    workspace.CreateBlob(blob_in)

    create_input_blobs_for_net(model.net.Proto())
    if cfg.MODEL.MASK_ON:
        create_input_blobs_for_net(model.mask_net.Proto())
    if cfg.MODEL.KEYPOINTS_ON:
        create_input_blobs_for_net(model.keypoint_net.Proto())


# ---------------------------------------------------------------------------- #
# ********************** DEPRECATED FUNCTIONALITY BELOW ********************** #
# ---------------------------------------------------------------------------- #

# ---------------------------------------------------------------------------- #
# Hardcoded functions to create various types of common models
#
#            *** This type of model definition is deprecated ***
#            *** Use the generic composable versions instead ***
#
# ---------------------------------------------------------------------------- #

import detectron.modeling.ResNet as ResNet
import detectron.modeling.VGG16 as VGG16
import detectron.modeling.VGG_CNN_M_1024 as VGG_CNN_M_1024


def fast_rcnn(model):
    logger.warn('Deprecated: use `MODEL.TYPE: generalized_rcnn`.')
    return generalized_rcnn(model)


def mask_rcnn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.MASK_ON: True`'
    )
    return generalized_rcnn(model)


def keypoint_rcnn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.KEYPOINTS_ON: True`'
    )
    return generalized_rcnn(model)


def mask_and_keypoint_rcnn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.MASK_ON: True and ``MODEL.KEYPOINTS_ON: True`'
    )
    return generalized_rcnn(model)


def rpn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.RPN_ONLY: True`'
    )
    return generalized_rcnn(model)


def fpn_rpn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.RPN_ONLY: True` and FPN enabled via configs'
    )
    return generalized_rcnn(model)


def faster_rcnn(model):
    logger.warn(
        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
        '`MODEL.FASTER_RCNN: True`'
    )
    return generalized_rcnn(model)


def fast_rcnn_frozen_features(model):
    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
    return build_generic_detection_model(
        model,
        get_func(cfg.MODEL.CONV_BODY),
        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
        freeze_conv_body=True
    )


def rpn_frozen_features(model):
    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
    return build_generic_detection_model(
        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True
    )


def fpn_rpn_frozen_features(model):
    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
    return build_generic_detection_model(
        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True
    )


def mask_rcnn_frozen_features(model):
    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
    return build_generic_detection_model(
        model,
        get_func(cfg.MODEL.CONV_BODY),
        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),
        freeze_conv_body=True
    )


def keypoint_rcnn_frozen_features(model):
    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
    return build_generic_detection_model(
        model,
        get_func(cfg.MODEL.CONV_BODY),
        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),
        freeze_conv_body=True
    )


# ---------------------------------------------------------------------------- #
# Fast R-CNN models
# ---------------------------------------------------------------------------- #


def VGG_CNN_M_1024_fast_rcnn(model):
    return build_generic_detection_model(
        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,
        VGG_CNN_M_1024.add_VGG_CNN_M_1024_roi_fc_head
    )


def VGG16_fast_rcnn(model):
    return build_generic_detection_model(
        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head
    )


def ResNet50_fast_rcnn(model):
    return build_generic_detection_model(
        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head
    )


def ResNet101_fast_rcnn(model):
    return build_generic_detection_model(
        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head
    )


def ResNet50_fast_rcnn_frozen_features(model):
    return build_generic_detection_model(
        model,
        ResNet.add_ResNet50_conv4_body,
        ResNet.add_ResNet_roi_conv5_head,
        freeze_conv_body=True
    )


def ResNet101_fast_rcnn_frozen_features(model):
    return build_generic_detection_model(
        model,
        ResNet.add_ResNet101_conv4_body,
        ResNet.add_ResNet_roi_conv5_head,
        freeze_conv_body=True
    )


# ---------------------------------------------------------------------------- #
# RPN-only models
# ---------------------------------------------------------------------------- #


def VGG_CNN_M_1024_rpn(model):
    return build_generic_detection_model(
        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body
    )


def VGG16_rpn(model):
    return build_generic_detection_model(model, VGG16.add_VGG16_conv5_body)


def ResNet50_rpn_conv4(model):
    return build_generic_detection_model(model, ResNet.add_ResNet50_conv4_body)


def ResNet101_rpn_conv4(model):
    return build_generic_detection_model(model, ResNet.add_ResNet101_conv4_body)


def VGG_CNN_M_1024_rpn_frozen_features(model):
    return build_generic_detection_model(
        model,
        VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,
        freeze_conv_body=True
    )


def VGG16_rpn_frozen_features(model):
    return build_generic_detection_model(
        model, VGG16.add_VGG16_conv5_body, freeze_conv_body=True
    )


def ResNet50_rpn_conv4_frozen_features(model):
    return build_generic_detection_model(
        model, ResNet.add_ResNet50_conv4_body, freeze_conv_body=True
    )


def ResNet101_rpn_conv4_frozen_features(model):
    return build_generic_detection_model(
        model, ResNet.add_ResNet101_conv4_body, freeze_conv_body=True
    )


# ---------------------------------------------------------------------------- #
# Faster R-CNN models
# ---------------------------------------------------------------------------- #


def VGG16_faster_rcnn(model):
    assert cfg.MODEL.FASTER_RCNN
    return build_generic_detection_model(
        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head
    )


def ResNet50_faster_rcnn(model):
    assert cfg.MODEL.FASTER_RCNN
    return build_generic_detection_model(
        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head
    )


def ResNet101_faster_rcnn(model):
    assert cfg.MODEL.FASTER_RCNN
    return build_generic_detection_model(
        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head
    )


# ---------------------------------------------------------------------------- #
# R-FCN models
# ---------------------------------------------------------------------------- #


def ResNet50_rfcn(model):
    return build_generic_rfcn_model(
        model, ResNet.add_ResNet50_conv5_body, dim_reduce=1024
    )


def ResNet101_rfcn(model):
    return build_generic_rfcn_model(
        model, ResNet.add_ResNet101_conv5_body, dim_reduce=1024
    )


================================================
FILE: detectron/modeling/name_compat.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Handle mapping from old network building function names to new names.

Flexible network configuration is achieved by specifying the function name that
builds a network module (e.g., the name of the conv backbone or the mask roi
head). However we may wish to change names over time without breaking previous
config files. This module provides backwards naming compatibility by providing
a mapping from the old name to the new name.

When renaming functions, it's generally a good idea to codemod existing yaml
config files. An easy way to batch edit, by example, is a shell command like

$ find . -name "*.yaml" -exec sed -i -e \
   's/head_builder\.add_roi_2mlp_head/fast_rcnn_heads.add_roi_2mlp_head/g' {} \;

to perform the renaming:
  head_builder.add_roi_2mlp_head => fast_rcnn_heads.add_roi_2mlp_head
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals


_RENAME = {
    # Removed "ResNet_" from the name because it wasn't relevent
    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs':
        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs',
    # Removed "ResNet_" from the name because it wasn't relevent
    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up':
        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up',
    # Removed "ResNet_" from the name because it wasn't relevent
    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0upshare':
        'mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare',
    # Removed "ResNet_" from the name because it wasn't relevent
    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0up':
        'mask_rcnn_heads.mask_rcnn_fcn_head_v0up',
    # Removed head_builder module in favor of the more specific fast_rcnn name
    'head_builder.add_roi_2mlp_head':
        'fast_rcnn_heads.add_roi_2mlp_head',
}


def get_new_name(func_name):
    if func_name in _RENAME:
        func_name = _RENAME[func_name]
    return func_name


================================================
FILE: detectron/modeling/optimizer.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Optimization operator graph construction."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging

from caffe2.python import muji

from detectron.core.config import cfg
import detectron.utils.c2 as c2_utils

logger = logging.getLogger(__name__)


def build_data_parallel_model(model, single_gpu_build_func):
    """Build a data parallel model given a function that builds the model on a
    single GPU.
    """
    if model.only_build_forward_pass:
        single_gpu_build_func(model)
    elif model.train:
        all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
        # Add backward pass on all GPUs
        model.AddGradientOperators(all_loss_gradients)
        if cfg.NUM_GPUS > 1:
            _add_allreduce_graph(model)
        for gpu_id in range(cfg.NUM_GPUS):
            # After allreduce, all GPUs perform SGD updates on their identical
            # params and gradients in parallel
            with c2_utils.NamedCudaScope(gpu_id):
                add_single_gpu_param_update_ops(model, gpu_id)
    else:
        # Test-time network operates on single GPU
        # Test-time parallelism is implemented through multiprocessing
        with c2_utils.NamedCudaScope(model.target_gpu_id):
            single_gpu_build_func(model)


def _build_forward_graph(model, single_gpu_build_func):
    """Construct the forward graph on each GPU."""
    all_loss_gradients = {}  # Will include loss gradients from all GPUs
    # Build the model on each GPU with correct name and device scoping
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            all_loss_gradients.update(single_gpu_build_func(model))
    return all_loss_gradients


def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')


def add_single_gpu_param_update_ops(model, gpu_id):
    # Learning rate of 0 is a dummy value to be set properly at the
    # start of training
    lr = model.param_init_net.ConstantFill(
        [], 'lr', shape=[1], value=0.0
    )
    one = model.param_init_net.ConstantFill(
        [], 'one', shape=[1], value=1.0
    )
    wd = model.param_init_net.ConstantFill(
        [], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY
    )
    # weight decay of GroupNorm's parameters
    wd_gn = model.param_init_net.ConstantFill(
        [], 'wd_gn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_GN
    )
    for param in model.TrainableParams(gpu_id=gpu_id):
        logger.debug('param ' + str(param) + ' will be updated')
        param_grad = model.param_to_grad[param]
        # Initialize momentum vector
        param_momentum = model.param_init_net.ConstantFill(
            [param], param + '_momentum', value=0.0
        )
        if param in model.biases:
            # Special treatment for biases (mainly to match historical impl.
            # details):
            # (1) Do not apply weight decay
            # (2) Use a 2x higher learning rate
            model.Scale(param_grad, param_grad, scale=2.0)
        elif param in model.gn_params:
            # Special treatment for GroupNorm's parameters
            model.WeightedSum([param_grad, one, param, wd_gn], param_grad)
        elif cfg.SOLVER.WEIGHT_DECAY > 0:
            # Apply weight decay to non-bias weights
            model.WeightedSum([param_grad, one, param, wd], param_grad)
        # Update param_grad and param_momentum in place
        model.net.MomentumSGDUpdate(
            [param_grad, param_momentum, lr, param],
            [param_grad, param_momentum, param],
            momentum=cfg.SOLVER.MOMENTUM
        )


================================================
FILE: detectron/modeling/retinanet_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""RetinaNet model heads and losses. See: https://arxiv.org/abs/1708.02002."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from detectron.core.config import cfg
import detectron.utils.blob as blob_utils


def get_retinanet_bias_init(model):
    """Initialize the biases for the conv ops that predict class probabilities.
    Initialization is performed such that at the start of training, all
    locations are predicted to be background with high probability
    (e.g., ~0.99 = 1 - cfg.RETINANET.PRIOR_PROB). See the Focal Loss paper for
    details.
    """
    prior_prob = cfg.RETINANET.PRIOR_PROB
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    if cfg.RETINANET.SOFTMAX:
        # Multiclass softmax case
        bias = np.zeros((model.num_classes, 1), dtype=np.float32)
        bias[0] = np.log(
            (model.num_classes - 1) * (1 - prior_prob) / (prior_prob)
        )
        bias = np.vstack(
            [bias for _ in range(scales_per_octave * aspect_ratios)]
        )
        bias_init = (
            'GivenTensorFill', {
                'values': bias.astype(dtype=np.float32)
            }
        )
    else:
        # Per-class sigmoid (binary classification) case
        bias_init = (
            'ConstantFill', {
                'value': -np.log((1 - prior_prob) / prior_prob)
            }
        )
    return bias_init


def add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales):
    """RetinaNet head. For classification and box regression, we can chose to
    have the same conv tower or a separate tower. "bl_feat_list" stores the list
    of feature blobs for bbox prediction. These blobs can be shared cls feature
    blobs if we share the tower or else are independent blobs.
    """
    dim_out = dim_in
    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE

    # compute init for bias
    bias_init = get_retinanet_bias_init(model)

    assert len(blobs_in) == k_max - k_min + 1
    bbox_feat_list = []
    cls_pred_dim = (
        model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1)
    )
    # unpacked bbox feature and add prediction layers
    bbox_regr_dim = (
        4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4
    )

    # ==========================================================================
    # classification tower with logits and prob prediction
    # ==========================================================================
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
        # classification tower stack convolution starts
        for nconv in range(cfg.RETINANET.NUM_CONVS):
            suffix = 'n{}_fpn{}'.format(nconv, lvl)
            dim_in, dim_out = dim_in, dim_in
            if lvl == k_min:
                bl_out = model.Conv(
                    bl_in,
                    'retnet_cls_conv_' + suffix,
                    dim_in,
                    dim_out,
                    3,
                    stride=1,
                    pad=1,
                    weight_init=('GaussianFill', {
                        'std': 0.01
                    }),
                    bias_init=('ConstantFill', {
                        'value': 0.
                    })
                )
            else:
                bl_out = model.ConvShared(
                    bl_in,
                    'retnet_cls_conv_' + suffix,
                    dim_in,
                    dim_out,
                    3,
                    stride=1,
                    pad=1,
                    weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),
                    bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)
                )
            bl_in = model.Relu(bl_out, bl_out)
            bl_feat = bl_in
        # cls tower stack convolution ends. Add the logits layer now
        if lvl == k_min:
            retnet_cls_pred = model.Conv(
                bl_feat,
                'retnet_cls_pred_fpn{}'.format(lvl),
                dim_in,
                cls_pred_dim * A,
                3,
                pad=1,
                stride=1,
                weight_init=('GaussianFill', {
                    'std': 0.01
                }),
                bias_init=bias_init
            )
        else:
            retnet_cls_pred = model.ConvShared(
                bl_feat,
                'retnet_cls_pred_fpn{}'.format(lvl),
                dim_in,
                cls_pred_dim * A,
                3,
                pad=1,
                stride=1,
                weight='retnet_cls_pred_fpn{}_w'.format(k_min),
                bias='retnet_cls_pred_fpn{}_b'.format(k_min)
            )
        if not model.train:
            if cfg.RETINANET.SOFTMAX:
                model.net.GroupSpatialSoftmax(
                    retnet_cls_pred,
                    'retnet_cls_prob_fpn{}'.format(lvl),
                    num_classes=cls_pred_dim
                )
            else:
                model.net.Sigmoid(
                    retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl)
                )
        if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
            bbox_feat_list.append(bl_feat)

    # ==========================================================================
    # bbox tower if not sharing features with the classification tower with
    # logits and prob prediction
    # ==========================================================================
    if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
        for lvl in range(k_min, k_max + 1):
            bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
            for nconv in range(cfg.RETINANET.NUM_CONVS):
                suffix = 'n{}_fpn{}'.format(nconv, lvl)
                dim_in, dim_out = dim_in, dim_in
                if lvl == k_min:
                    bl_out = model.Conv(
                        bl_in,
                        'retnet_bbox_conv_' + suffix,
                        dim_in,
                        dim_out,
                        3,
                        stride=1,
                        pad=1,
                        weight_init=('GaussianFill', {
                            'std': 0.01
                        }),
                        bias_init=('ConstantFill', {
                            'value': 0.
                        })
                    )
                else:
                    bl_out = model.ConvShared(
                        bl_in,
                        'retnet_bbox_conv_' + suffix,
                        dim_in,
                        dim_out,
                        3,
                        stride=1,
                        pad=1,
                        weight='retnet_bbox_conv_n{}_fpn{}_w'.format(
                            nconv, k_min
                        ),
                        bias='retnet_bbox_conv_n{}_fpn{}_b'.format(
                            nconv, k_min
                        )
                    )
                bl_in = model.Relu(bl_out, bl_out)
                # Add octave scales and aspect ratio
                # At least 1 convolution for dealing different aspect ratios
                bl_feat = bl_in
            bbox_feat_list.append(bl_feat)
    # Depending on the features [shared/separate] for bbox, add prediction layer
    for i, lvl in enumerate(range(k_min, k_max + 1)):
        bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)
        bl_feat = bbox_feat_list[i]
        if lvl == k_min:
            model.Conv(
                bl_feat,
                bbox_pred,
                dim_in,
                bbox_regr_dim * A,
                3,
                pad=1,
                stride=1,
                weight_init=('GaussianFill', {
                    'std': 0.01
                }),
                bias_init=('ConstantFill', {
                    'value': 0.
                })
            )
        else:
            model.ConvShared(
                bl_feat,
                bbox_pred,
                dim_in,
                bbox_regr_dim * A,
                3,
                pad=1,
                stride=1,
                weight='retnet_bbox_pred_fpn{}_w'.format(k_min),
                bias='retnet_bbox_pred_fpn{}_b'.format(k_min)
            )


def add_fpn_retinanet_losses(model):
    loss_gradients = {}
    gradients, losses = [], []

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid

    model.AddMetrics(['retnet_fg_num', 'retnet_bg_num'])
    # ==========================================================================
    # bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location
    # ==========================================================================
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        bbox_loss = model.net.SelectSmoothL1Loss(
            [
                'retnet_bbox_pred_' + suffix,
                'retnet_roi_bbox_targets_' + suffix,
                'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num'
            ],
            'retnet_loss_bbox_' + suffix,
            beta=cfg.RETINANET.BBOX_REG_BETA,
            scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT
        )
        gradients.append(bbox_loss)
        losses.append('retnet_loss_bbox_' + suffix)

    # ==========================================================================
    # cls loss - depends on softmax/sigmoid outputs
    # ==========================================================================
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_lvl_logits = 'retnet_cls_pred_' + suffix
        if not cfg.RETINANET.SOFTMAX:
            cls_focal_loss = model.net.SigmoidFocalLoss(
                [
                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,
                    'retnet_fg_num'
                ],
                ['fl_{}'.format(suffix)],
                gamma=cfg.RETINANET.LOSS_GAMMA,
                alpha=cfg.RETINANET.LOSS_ALPHA,
                scale=model.GetLossScale(),
                num_classes=model.num_classes - 1
            )
            gradients.append(cls_focal_loss)
            losses.append('fl_{}'.format(suffix))
        else:
            cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss(
                [
                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,
                    'retnet_fg_num'
                ],
                ['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)],
                gamma=cfg.RETINANET.LOSS_GAMMA,
                alpha=cfg.RETINANET.LOSS_ALPHA,
                scale=model.GetLossScale(),
                num_classes=model.num_classes
            )
            gradients.append(cls_focal_loss)
            losses.append('fl_{}'.format(suffix))

    loss_gradients.update(blob_utils.get_loss_gradients(model, gradients))
    model.AddLosses(losses)
    return loss_gradients


================================================
FILE: detectron/modeling/rfcn_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill


# ---------------------------------------------------------------------------- #
# R-FCN outputs and losses
# ---------------------------------------------------------------------------- #

def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):
    if dim_reduce is not None:
        # Optional dim reduction
        blob_in = model.Conv(
            blob_in,
            'conv_dim_reduce',
            dim_in,
            dim_reduce,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=gauss_fill(0.01),
            bias_init=const_fill(0.0)
        )
        blob_in = model.Relu(blob_in, blob_in)
        dim_in = dim_reduce
    # Classification conv
    model.Conv(
        blob_in,
        'conv_cls',
        dim_in,
        model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # Bounding-box regression conv
    num_bbox_reg_classes = (
        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes
    )
    model.Conv(
        blob_in,
        'conv_bbox_pred',
        dim_in,
        4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # Classification PS RoI pooling
    model.net.PSRoIPool(
        ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'],
        group_size=cfg.RFCN.PS_GRID_SIZE,
        output_dim=model.num_classes,
        spatial_scale=spatial_scale
    )
    model.AveragePool(
        'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE
    )
    model.net.Reshape(
        'cls_score_4d', ['cls_score', '_cls_scores_shape'],
        shape=(-1, cfg.MODEL.NUM_CLASSES)
    )
    if not model.train:
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    # Bbox regression PS RoI pooling
    model.net.PSRoIPool(
        ['conv_bbox_pred', 'rois'],
        ['psroipooled_bbox', '_mapping_channel_bbox'],
        group_size=cfg.RFCN.PS_GRID_SIZE,
        output_dim=4 * num_bbox_reg_classes,
        spatial_scale=spatial_scale
    )
    model.AveragePool(
        'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE
    )


================================================
FILE: detectron/modeling/rpn_heads.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from detectron.core.config import cfg
from detectron.modeling.generate_anchors import generate_anchors
from detectron.utils.c2 import const_fill
from detectron.utils.c2 import gauss_fill
import detectron.modeling.FPN as FPN
import detectron.utils.blob as blob_utils


# ---------------------------------------------------------------------------- #
# RPN and Faster R-CNN outputs and losses
# ---------------------------------------------------------------------------- #

def add_generic_rpn_outputs(model, blob_in, dim_in, spatial_scale_in):
    """Add RPN outputs (objectness classification and bounding box regression)
    to an RPN model. Abstracts away the use of FPN.
    """
    loss_gradients = None
    if cfg.FPN.FPN_ON:
        # Delegate to the FPN module
        FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
        if cfg.MODEL.FASTER_RCNN:
            # CollectAndDistributeFpnRpnProposals also labels proposals when in
            # training mode
            model.CollectAndDistributeFpnRpnProposals()
        if model.train:
            loss_gradients = FPN.add_fpn_rpn_losses(model)
    else:
        # Not using FPN, add RPN to a single scale
        add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
        if model.train:
            loss_gradients = add_single_scale_rpn_losses(model)
    return loss_gradients


def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):
    """Add RPN outputs to a single scale model (i.e., no FPN)."""
    anchors = generate_anchors(
        stride=1. / spatial_scale,
        sizes=cfg.RPN.SIZES,
        aspect_ratios=cfg.RPN.ASPECT_RATIOS
    )
    num_anchors = anchors.shape[0]
    dim_out = dim_in
    # RPN hidden representation
    model.Conv(
        blob_in,
        'conv_rpn',
        dim_in,
        dim_out,
        kernel=3,
        pad=1,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    model.Relu('conv_rpn', 'conv_rpn')
    # Proposal classification scores
    model.Conv(
        'conv_rpn',
        'rpn_cls_logits',
        dim_in,
        num_anchors,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # Proposal bbox regression deltas
    model.Conv(
        'conv_rpn',
        'rpn_bbox_pred',
        dim_in,
        4 * num_anchors,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )

    if not model.train or cfg.MODEL.FASTER_RCNN:
        # Proposals are needed during:
        #  1) inference (== not model.train) for RPN only and Faster R-CNN
        #  OR
        #  2) training for Faster R-CNN
        # Otherwise (== training for RPN only), proposals are not needed
        model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')
        model.GenerateProposals(
            ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],
            ['rpn_rois', 'rpn_roi_probs'],
            anchors=anchors,
            spatial_scale=spatial_scale
        )

    if cfg.MODEL.FASTER_RCNN:
        if model.train:
            # Add op that generates training labels for in-network RPN proposals
            model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])
        else:
            # Alias rois to rpn_rois for inference
            model.net.Alias('rpn_rois', 'rois')


def add_single_scale_rpn_losses(model):
    """Add losses for a single scale RPN model (i.e., no FPN)."""
    # Spatially narrow the full-sized RPN label arrays to match the feature map
    # shape
    model.net.SpatialNarrowAs(
        ['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32'
    )
    for key in ('targets', 'inside_weights', 'outside_weights'):
        model.net.SpatialNarrowAs(
            ['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key
        )
    loss_rpn_cls = model.net.SigmoidCrossEntropyLoss(
        ['rpn_cls_logits', 'rpn_labels_int32'],
        'loss_rpn_cls',
        scale=model.GetLossScale()
    )
    loss_rpn_bbox = model.net.SmoothL1Loss(
        [
            'rpn_bbox_pred', 'rpn_bbox_targets', 'rpn_bbox_inside_weights',
            'rpn_bbox_outside_weights'
        ],
        'loss_rpn_bbox',
        beta=1. / 9.,
        scale=model.GetLossScale()
    )
    loss_gradients = blob_utils.get_loss_gradients(
        model, [loss_rpn_cls, loss_rpn_bbox]
    )
    model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox'])
    return loss_gradients


================================================
FILE: detectron/ops/__init__.py
================================================


================================================
FILE: detectron/ops/collect_and_distribute_fpn_rpn_proposals.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from detectron.core.config import cfg
from detectron.datasets import json_dataset
from detectron.datasets import roidb as roidb_utils
import detectron.modeling.FPN as fpn
import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
import detectron.utils.blob as blob_utils


class CollectAndDistributeFpnRpnProposalsOp:
    def __init__(self, train):
        self._train = train

    def forward(self, inputs, outputs):
        """See modeling.detector.CollectAndDistributeFpnRpnProposals for
        inputs/outputs documentation.
        """
        # inputs is
        # [rpn_rois_fpn2, ..., rpn_rois_fpn6,
        #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
        # If training with Faster R-CNN, then inputs will additionally include
        #  + [roidb, im_info]
        rois = collect(inputs, self._train)
        if self._train:
            # During training we reuse the data loader code. We populate roidb
            # entries on the fly using the rois generated by RPN.
            # im_info: [[im_height, im_width, im_scale], ...]
            im_info = inputs[-1].data
            im_scales = im_info[:, 2]
            roidb = blob_utils.deserialize(inputs[-2].data)
            # For historical consistency with the original Faster R-CNN
            # implementation we are *not* filtering crowd proposals.
            # This choice should be investigated in the future (it likely does
            # not matter).
            json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
            roidb_utils.add_bbox_regression_targets(roidb)
            # Compute training labels for the RPN proposals; also handles
            # distributing the proposals over FPN levels
            output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()
            blobs = {k: [] for k in output_blob_names}
            fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
            for i, k in enumerate(output_blob_names):
                blob_utils.py_op_copy_blob(blobs[k], outputs[i])
        else:
            # For inference we have a special code path that avoids some data
            # loader overhead
            distribute(rois, None, outputs, self._train)


def collect(inputs, is_training):
    cfg_key = 'TRAIN' if is_training else 'TEST'
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    k_max = cfg.FPN.RPN_MAX_LEVEL
    k_min = cfg.FPN.RPN_MIN_LEVEL
    num_lvls = k_max - k_min + 1
    roi_inputs = inputs[:num_lvls]
    score_inputs = inputs[num_lvls:]
    if is_training:
        score_inputs = score_inputs[:-2]

    # rois are in [[batch_idx, x0, y0, x1, y2], ...] format
    # Combine predictions across all levels and retain the top scoring
    rois = np.concatenate([blob.data for blob in roi_inputs])
    scores = np.concatenate([blob.data for blob in score_inputs]).squeeze()
    inds = np.argsort(-scores)[:post_nms_topN]
    rois = rois[inds, :]
    return rois


def distribute(rois, label_blobs, outputs, train):
    """To understand the output blob order see return value of
    detectron.roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
    """
    lvl_min = cfg.FPN.ROI_MIN_LEVEL
    lvl_max = cfg.FPN.ROI_MAX_LEVEL
    lvls = fpn.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)

    outputs[0].reshape(rois.shape)
    outputs[0].data[...] = rois

    # Create new roi blobs for each FPN level
    # (See: modeling.FPN.add_multilevel_roi_blobs which is similar but annoying
    # to generalize to support this particular case.)
    rois_idx_order = np.empty((0, ))
    for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
        idx_lvl = np.where(lvls == lvl)[0]
        blob_roi_level = rois[idx_lvl, :]
        outputs[output_idx + 1].reshape(blob_roi_level.shape)
        outputs[output_idx + 1].data[...] = blob_roi_level
        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
    rois_idx_restore = np.argsort(rois_idx_order)
    blob_utils.py_op_copy_blob(rois_idx_restore.astype(np.int32), outputs[-1])


================================================
FILE: detectron/ops/generate_proposal_labels.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging

from detectron.datasets import json_dataset
from detectron.datasets import roidb as roidb_utils
from detectron.utils import blob as blob_utils
import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data

logger = logging.getLogger(__name__)


class GenerateProposalLabelsOp:

    def forward(self, inputs, outputs):
        """See modeling.detector.GenerateProposalLabels for inputs/outputs
        documentation.
        """
        # During training we reuse the data loader code. We populate roidb
        # entries on the fly using the rois generated by RPN.
        # im_info: [[im_height, im_width, im_scale], ...]
        rois = inputs[0].data
        roidb = blob_utils.deserialize(inputs[1].data)
        im_info = inputs[2].data
        im_scales = im_info[:, 2]
        output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()
        # For historical consistency with the original Faster R-CNN
        # implementation we are *not* filtering crowd proposals.
        # This choice should be investigated in the future (it likely does
        # not matter).
        json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
        roidb_utils.add_bbox_regression_targets(roidb)
        blobs = {k: [] for k in output_blob_names}
        fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
        for i, k in enumerate(output_blob_names):
            blob_utils.py_op_copy_blob(blobs[k], outputs[i])


================================================
FILE: detectron/ops/generate_proposals.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import numpy as np

from detectron.core.config import cfg
import detectron.utils.boxes as box_utils


class GenerateProposalsOp:
    """Output object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    See comment in utils/boxes:bbox_transform_inv for details abouts the
    optional `reg_weights` parameter.
    """

    def __init__(self, anchors, spatial_scale, train, reg_weights=(1.0, 1.0, 1.0, 1.0)):
        self._anchors = anchors
        self._num_anchors = self._anchors.shape[0]
        self._feat_stride = 1. / spatial_scale
        self._train = train
        self._reg_weights = reg_weights

    def forward(self, inputs, outputs):
        """See modeling.detector.GenerateProposals for inputs/outputs
        documentation.
        """
        # 1. for each location i in a (H, W) grid:
        #      generate A anchor boxes centered on cell i
        #      apply predicted bbox deltas to each of the A anchors at cell i
        # 2. clip predicted boxes to image
        # 3. remove predicted boxes with either height or width < threshold
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take the top pre_nms_topN proposals before NMS
        # 6. apply NMS with a loose threshold (0.7) to the remaining proposals
        # 7. take after_nms_topN proposals after NMS
        # 8. return the top proposals

        # predicted probability of fg object for each RPN anchor
        scores = inputs[0].data
        # predicted achors transformations
        bbox_deltas = inputs[1].data
        # input image (height, width, scale), in which scale is the scale factor
        # applied to the original dataset image to get the network input image
        im_info = inputs[2].data
        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]
        # Enumerate all shifted positions on the (H, W) grid
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y, copy=False)
        # Convert to (K, 4), K=H*W, where the columns are (dx, dy, dx, dy)
        # shift pointing to each grid location
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Broacast anchors over shifts to enumerate all anchors at all positions
        # in the (H, W) grid:
        #   - add A anchors of shape (1, A, 4) to
        #   - K shifts of shape (K, 1, 4) to get
        #   - all shifted anchors of shape (K, A, 4)
        #   - reshape to (K*A, 4) shifted anchors
        num_images = inputs[0].shape[0]
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = self._anchors[np.newaxis, :, :] + shifts[:, np.newaxis, :]
        all_anchors = all_anchors.reshape((K * A, 4))

        rois = np.empty((0, 5), dtype=np.float32)
        roi_probs = np.empty((0, 1), dtype=np.float32)
        for im_i in range(num_images):
            im_i_boxes, im_i_probs = self.proposals_for_one_image(
                im_info[im_i, :], all_anchors, bbox_deltas[im_i, :, :, :],
                scores[im_i, :, :, :]
            )
            batch_inds = im_i * np.ones(
                (im_i_boxes.shape[0], 1), dtype=np.float32
            )
            im_i_rois = np.hstack((batch_inds, im_i_boxes))
            rois = np.append(rois, im_i_rois, axis=0)
            roi_probs = np.append(roi_probs, im_i_probs, axis=0)

        outputs[0].reshape(rois.shape)
        outputs[0].data[...] = rois
        if len(outputs) > 1:
            outputs[1].reshape(roi_probs.shape)
            outputs[1].data[...] = roi_probs

    def proposals_for_one_image(
        self, im_info, all_anchors, bbox_deltas, scores
    ):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores.squeeze(), pre_nms_topN
            )[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, self._reg_weights)

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores


def _filter_boxes(boxes, min_size, im_info):
    """Only keep boxes with both sides >= min_size and center within the image.
    """
    # Compute the width and height of the proposal boxes as measured in the original
    # image coordinate system (this is required to avoid "Negative Areas Found"
    # assertions in other parts of the code that measure).
    im_scale = im_info[2]
    ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
    hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
    # To avoid numerical issues we require the min_size to be at least 1 pixel in the
    # original image
    min_size = np.maximum(min_size, 1)
    # Proposal center is computed relative to the scaled input image
    ws = boxes[:, 2] - boxes[:, 0] + 1
    hs = boxes[:, 3] - boxes[:, 1] + 1
    x_ctr = boxes[:, 0] + ws / 2.
    y_ctr = boxes[:, 1] + hs / 2.
    keep = np.where(
        (ws_orig_scale >= min_size)
        & (hs_orig_scale >= min_size)
        & (x_ctr < im_info[1])
        & (y_ctr < im_info[0])
    )[0]
    return keep


================================================
FILE: detectron/ops/zero_even_op.cc
================================================
/**
 * Copyright (c) 2016-present, Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "zero_even_op.h"

namespace caffe2 {

template <>
bool ZeroEvenOp<float, CPUContext>::RunOnDevice() {
  // Retrieve the input tensor.
  const auto& X = Input(0);
  CAFFE_ENFORCE(X.dim() == 1);

  // Initialize the output tensor to a copy of the input tensor.
  auto* Y = Output(0);
  Y->CopyFrom(X);

  // Set output elements at even indices to zero.
  auto* Y_data = Y->mutable_data<float>();
  for (auto i = 0; i < Y->numel(); i += 2) {
    Y_data[i] = 0.0f;
  }

  return true;
}

REGISTER_CPU_OPERATOR(ZeroEven, ZeroEvenOp<float, CPUContext>);

OPERATOR_SCHEMA(ZeroEven)
    .NumInputs(1)
    .NumOutputs(1)
    .Input(
        0,
        "X",
        "1D input tensor")
    .Output(
        0,
        "Y",
        "1D output tensor");

} // namespace caffe2


================================================
FILE: detectron/ops/zero_even_op.cu
================================================
/**
 * Copyright (c) 2016-present, Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "caffe2/core/context_gpu.h"

#include "zero_even_op.h"

namespace caffe2 {

namespace {

template <typename T>
__global__ void SetEvenIndsToVal(size_t num_even_inds, T val, T* data) {
  CUDA_1D_KERNEL_LOOP(i, num_even_inds) {
    data[i << 1] = val;
  }
}

} // namespace

template <>
bool ZeroEvenOp<float, CUDAContext>::RunOnDevice() {
  // Retrieve the input tensor.
  const auto& X = Input(0);
  CAFFE_ENFORCE(X.ndim() == 1);

  // Initialize the output tensor to a copy of the input tensor.
  auto* Y = Output(0);
  Y->CopyFrom(X);

  // Set output elements at even indices to zero.
  auto output_size = Y->size();

  if (output_size > 0) {
    size_t num_even_inds = output_size / 2 + output_size % 2;
    SetEvenIndsToVal<float>
        <<<CAFFE_GET_BLOCKS(num_even_inds),
           CAFFE_CUDA_NUM_THREADS,
           0,
           context_.cuda_stream()>>>(
            num_even_inds,
            0.0f,
            Y->mutable_data<float>());
  }

  return true;
}

REGISTER_CUDA_OPERATOR(ZeroEven, ZeroEvenOp<float, CUDAContext>);

} // namespace caffe2


================================================
FILE: detectron/ops/zero_even_op.h
================================================
/**
 * Copyright (c) 2016-present, Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ZERO_EVEN_OP_H_
#define ZERO_EVEN_OP_H_

#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"

namespace caffe2 {

/**
 * ZeroEven operator. Zeros elements at even indices of an 1D array.
 * Elements at odd indices are preserved.
 *
 * This toy operator is an example of a custom operator and may be a useful
 * reference for adding new custom operators to the Detectron codebase.
 */
template <typename T, class Context>
class ZeroEvenOp final : public Operator<Context> {
 public:
  // Introduce Operator<Context> helper members.
  USE_OPERATOR_CONTEXT_FUNCTIONS;

  ZeroEvenOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws) {}

  bool RunOnDevice() override;
};

} // namespace caffe2

#endif // ZERO_EVEN_OP_H_


================================================
FILE: detectron/roi_data/__init__.py
================================================


================================================
FILE: detectron/roi_data/data_utils.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Common utility functions for RPN and RetinaNet minibtach blobs preparation.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import namedtuple
import logging
import numpy as np
import threading

from detectron.core.config import cfg
from detectron.modeling.generate_anchors import generate_anchors
import detectron.utils.boxes as box_utils

logger = logging.getLogger(__name__)


# octave and aspect fields are only used on RetinaNet. Octave corresponds to the
# scale of the anchor and aspect denotes which aspect ratio is used in the range
# of aspect ratios
FieldOfAnchors = namedtuple(
    'FieldOfAnchors', [
        'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',
        'octave', 'aspect'
    ]
)

# Cache for memoizing _get_field_of_anchors
_threadlocal_foa = threading.local()


def get_field_of_anchors(
    stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None
):
    global _threadlocal_foa
    if not hasattr(_threadlocal_foa, 'cache'):
        _threadlocal_foa.cache = {}

    cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)
    if cache_key in _threadlocal_foa.cache:
        return _threadlocal_foa.cache[cache_key]

    # Anchors at a single feature cell
    cell_anchors = generate_anchors(
        stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios
    )
    num_cell_anchors = cell_anchors.shape[0]

    # Generate canonical proposals from shifted anchors
    # Enumerate all shifted positions on the (H, W) grid
    fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(
        cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)
    )
    field_size = int(np.ceil(fpn_max_size / float(stride)))
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.ravel()
    shift_y = shift_y.ravel()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()

    # Broacast anchors over shifts to enumerate all anchors at all positions
    # in the (H, W) grid:
    #   - add A cell anchors of shape (1, A, 4) to
    #   - K shifts of shape (K, 1, 4) to get
    #   - all shifted anchors of shape (K, A, 4)
    #   - reshape to (K*A, 4) shifted anchors
    A = num_cell_anchors
    K = shifts.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    )
    field_of_anchors = field_of_anchors.reshape((K * A, 4))
    foa = FieldOfAnchors(
        field_of_anchors=field_of_anchors.astype(np.float32),
        num_cell_anchors=num_cell_anchors,
        stride=stride,
        field_size=field_size,
        octave=octave,
        aspect=aspect
    )
    _threadlocal_foa.cache[cache_key] = foa
    return foa


def unmap(data, count, inds, fill=0):
    """Unmap a subset of item (data) back to the original set of items (of
    size count)"""
    if count == len(inds):
        return data

    if len(data.shape) == 1:
        ret = np.empty((count, ), dtype=data.dtype)
        ret.fill(fill)
        ret[inds] = data
    else:
        ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)
        ret.fill(fill)
        ret[inds, :] = data
    return ret


def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
    """Compute bounding-box regression targets for an image."""
    return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
        np.float32, copy=False
    )


================================================
FILE: detectron/roi_data/fast_rcnn.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Construct minibatches for Fast R-CNN training. Handles the minibatch blobs
that are specific to Fast R-CNN. Other blobs that are generic to RPN, etc.
are handled by their respecitive roi_data modules.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import numpy.random as npr

from detectron.core.config import cfg
import detectron.modeling.FPN as fpn
import detectron.roi_data.keypoint_rcnn as keypoint_rcnn_roi_data
import detectron.roi_data.mask_rcnn as mask_rcnn_roi_data
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils

logger = logging.getLogger(__name__)


def get_fast_rcnn_blob_names(is_training=True):
    """Fast R-CNN blob names."""
    # rois blob: holds R regions of interest, each is a 5-tuple
    # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a
    # rectangle (x1, y1, x2, y2)
    blob_names = ['rois']
    if is_training:
        # labels_int32 blob: R categorical labels in [0, ..., K] for K
        # foreground classes plus background
        blob_names += ['labels_int32']
    if is_training:
        # bbox_targets blob: R bounding-box regression targets with 4
        # targets per class
        blob_names += ['bbox_targets']
        # bbox_inside_weights blob: At most 4 targets per roi are active
        # this binary vector sepcifies the subset of active targets
        blob_names += ['bbox_inside_weights']
        blob_names += ['bbox_outside_weights']
    if is_training and cfg.MODEL.MASK_ON:
        # 'mask_rois': RoIs sampled for training the mask prediction branch.
        # Shape is (#masks, 5) in format (batch_idx, x1, y1, x2, y2).
        blob_names += ['mask_rois']
        # 'roi_has_mask': binary labels for the RoIs specified in 'rois'
        # indicating if each RoI has a mask or not. Note that in some cases
        # a *bg* RoI will have an all -1 (ignore) mask associated with it in
        # the case that no fg RoIs can be sampled. Shape is (batchsize).
        blob_names += ['roi_has_mask_int32']
        # 'masks_int32' holds binary masks for the RoIs specified in
        # 'mask_rois'. Shape is (#fg, M * M) where M is the ground truth
        # mask size.
        blob_names += ['masks_int32']
    if is_training and cfg.MODEL.KEYPOINTS_ON:
        # 'keypoint_rois': RoIs sampled for training the keypoint prediction
        # branch. Shape is (#instances, 5) in format (batch_idx, x1, y1, x2,
        # y2).
        blob_names += ['keypoint_rois']
        # 'keypoint_locations_int32': index of keypoint in
        # KRCNN.HEATMAP_SIZE**2 sized array. Shape is (#instances). Used in
        # SoftmaxWithLoss.
        blob_names += ['keypoint_locations_int32']
        # 'keypoint_weights': weight assigned to each target in
        # 'keypoint_locations_int32'. Shape is (#instances). Used in
        # SoftmaxWithLoss.
        blob_names += ['keypoint_weights']
        # 'keypoint_loss_normalizer': optional normalization factor to use if
        # cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
        blob_names += ['keypoint_loss_normalizer']
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
        # Support for FPN multi-level rois without bbox reg isn't
        # implemented (... and may never be implemented)
        k_max = cfg.FPN.ROI_MAX_LEVEL
        k_min = cfg.FPN.ROI_MIN_LEVEL
        # Same format as rois blob, but one per FPN level
        for lvl in range(k_min, k_max + 1):
            blob_names += ['rois_fpn' + str(lvl)]
        blob_names += ['rois_idx_restore_int32']
        if is_training:
            if cfg.MODEL.MASK_ON:
                for lvl in range(k_min, k_max + 1):
                    blob_names += ['mask_rois_fpn' + str(lvl)]
                blob_names += ['mask_rois_idx_restore_int32']
            if cfg.MODEL.KEYPOINTS_ON:
                for lvl in range(k_min, k_max + 1):
                    blob_names += ['keypoint_rois_fpn' + str(lvl)]
                blob_names += ['keypoint_rois_idx_restore_int32']
    return blob_names


def add_fast_rcnn_blobs(blobs, im_scales, roidb):
    """Add blobs needed for training Fast R-CNN style models."""
    # Sample training RoIs from each image and append them to the blob lists
    for im_i, entry in enumerate(roidb):
        frcn_blobs = _sample_rois(entry, im_scales[im_i], im_i)
        for k, v in frcn_blobs.items():
            blobs[k].append(v)
    # Concat the training blob lists into tensors
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)
    # Add FPN multilevel training RoIs, if configured
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois(blobs)

    # Perform any final work and validity checks after the collating blobs for
    # all minibatch images
    valid = True
    if cfg.MODEL.KEYPOINTS_ON:
        valid = keypoint_rcnn_roi_data.finalize_keypoint_minibatch(blobs, valid)

    return valid


def _sample_rois(roidb, im_scale, batch_idx):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM)
    fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image))
    max_overlaps = roidb['max_overlaps']

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(
            fg_inds, size=fg_rois_per_this_image, replace=False
        )

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where(
        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO)
    )[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size)
    # Sample foreground regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(
            bg_inds, size=bg_rois_per_this_image, replace=False
        )

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Label is the class each RoI has max overlap with
    sampled_labels = roidb['max_classes'][keep_inds]
    sampled_labels[fg_rois_per_this_image:] = 0  # Label bg RoIs with class 0
    sampled_boxes = roidb['boxes'][keep_inds]

    bbox_targets, bbox_inside_weights = _expand_bbox_targets(
        roidb['bbox_targets'][keep_inds, :]
    )
    bbox_outside_weights = np.array(
        bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype
    )

    # Scale rois and format as (batch_idx, x1, y1, x2, y2)
    sampled_rois = sampled_boxes * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1))
    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))

    # Base Fast R-CNN blobs
    blob_dict = dict(
        labels_int32=sampled_labels.astype(np.int32, copy=False),
        rois=sampled_rois,
        bbox_targets=bbox_targets,
        bbox_inside_weights=bbox_inside_weights,
        bbox_outside_weights=bbox_outside_weights
    )

    # Optionally add Mask R-CNN blobs
    if cfg.MODEL.MASK_ON:
        mask_rcnn_roi_data.add_mask_rcnn_blobs(
            blob_dict, sampled_boxes, roidb, im_scale, batch_idx
        )

    # Optionally add Keypoint R-CNN blobs
    if cfg.MODEL.KEYPOINTS_ON:
        keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(
            blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
        )

    return blob_dict


def _expand_bbox_targets(bbox_target_data):
    """Bounding-box regression targets are stored in a compact form in the
    roidb.

    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets). The loss weights
    are similarly expanded.

    Returns:
        bbox_target_data (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        num_bbox_reg_classes = 2  # bg and fg

    clss = bbox_target_data[:, 0]
    bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes))
    bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = int(clss[ind])
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0)
    return bbox_targets, bbox_inside_weights


def _add_multilevel_rois(blobs):
    """By default training RoIs are added for a single feature map level only.
    When using FPN, the RoIs must be distributed over different FPN levels
    according the level assignment heuristic (see: modeling.FPN.
    map_rois_to_fpn_levels).
    """
    lvl_min = cfg.FPN.ROI_MIN_LEVEL
    lvl_max = cfg.FPN.ROI_MAX_LEVEL

    def _distribute_rois_over_fpn_levels(rois_blob_name):
        """Distribute rois over the different FPN levels."""
        # Get target level for each roi
        # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take
        # the box coordinates from columns 1:5
        target_lvls = fpn.map_rois_to_fpn_levels(
            blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max
        )
        # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
        fpn.add_multilevel_roi_blobs(
            blobs, rois_blob_name, blobs[rois_blob_name], target_lvls, lvl_min,
            lvl_max
        )

    _distribute_rois_over_fpn_levels('rois')
    if cfg.MODEL.MASK_ON:
        _distribute_rois_over_fpn_levels('mask_rois')
    if cfg.MODEL.KEYPOINTS_ON:
        _distribute_rois_over_fpn_levels('keypoint_rois')


================================================
FILE: detectron/roi_data/keypoint_rcnn.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Construct minibatches for Mask R-CNN training when keypoints are enabled.
Handles the minibatch blobs that are specific to training Mask R-CNN for
keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are
handled by their respecitive roi_data modules.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np

from detectron.core.config import cfg
import detectron.utils.blob as blob_utils
import detectron.utils.keypoints as keypoint_utils

logger = logging.getLogger(__name__)


def add_keypoint_rcnn_blobs(
    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)
    )[0]

    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
    if kp_fg_inds.size > kp_fg_rois_per_this_image:
        kp_fg_inds = np.random.choice(
            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False
        )

    sampled_fg_rois = roidb['boxes'][kp_fg_inds]
    box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

    num_keypoints = gt_keypoints.shape[2]
    sampled_keypoints = -np.ones(
        (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
        dtype=gt_keypoints.dtype
    )
    for ii in range(len(sampled_fg_rois)):
        ind = box_to_gt_ind_map[ii]
        if ind >= 0:
            sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
            assert np.sum(sampled_keypoints[ii, 2, :]) > 0

    heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
        sampled_keypoints, sampled_fg_rois
    )

    shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
    heats = heats.reshape(shape)
    weights = weights.reshape(shape)

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1)
    )
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights


def finalize_keypoint_minibatch(blobs, valid):
    """Finalize the minibatch after blobs for all minibatch images have been
    collated.
    """
    min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH
    num_visible_keypoints = np.sum(blobs['keypoint_weights'])
    valid = (
        valid and len(blobs['keypoint_weights']) > 0 and
        num_visible_keypoints > min_count
    )
    # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
    # See modeling.model_builder.add_keypoint_losses
    norm = num_visible_keypoints / (
        cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM *
        cfg.TRAIN.FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS
    )
    blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32)
    return valid


def _within_box(points, boxes):
    """Validate which keypoints are contained inside a given box.

    points: Nx2xK
    boxes: Nx4
    output: NxK
    """
    x_within = np.logical_and(
        points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1),
        points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1)
    )
    y_within = np.logical_and(
        points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1),
        points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1)
    )
    return np.logical_and(x_within, y_within)


================================================
FILE: detectron/roi_data/loader.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Detectron data loader. The design is generic and abstracted away from any
details of the minibatch. A minibatch is a dictionary of blob name keys and
their associated numpy (float32 or int32) ndarray values.

Outline of the data loader design:

loader thread\
loader thread \                    / GPU 1 enqueue thread -> feed -> EnqueueOp
...           -> minibatch queue ->  ...
loader thread /                    \ GPU N enqueue thread -> feed -> EnqueueOp
loader thread/

<---------------------------- CPU -----------------------------|---- GPU ---->

A pool of loader threads construct minibatches that are put onto the shared
minibatch queue. Each GPU has an enqueue thread that pulls a minibatch off the
minibatch queue, feeds the minibatch blobs into the workspace, and then runs
an EnqueueBlobsOp to place the minibatch blobs into the GPU's blobs queue.
During each fprop the first thing the network does is run a DequeueBlobsOp
in order to populate the workspace with the blobs from a queued minibatch.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import deque
from collections import OrderedDict
import logging
import numpy as np
import signal
import threading
import time
import uuid
from six.moves import queue as Queue

from caffe2.python import core, workspace

from detectron.core.config import cfg
from detectron.roi_data.minibatch import get_minibatch
from detectron.roi_data.minibatch import get_minibatch_blob_names
from detectron.utils.coordinator import coordinated_get
from detectron.utils.coordinator import coordinated_put
from detectron.utils.coordinator import Coordinator
import detectron.utils.c2 as c2_utils

logger = logging.getLogger(__name__)


class RoIDataLoader:
    def __init__(
        self,
        roidb,
        num_loaders=4,
        minibatch_queue_size=64,
        blobs_queue_capacity=8
    ):
        self._roidb = roidb
        self._lock = threading.Lock()
        self._perm = deque(range(len(self._roidb)))
        self._cur = 0  # _perm cursor
        # The minibatch queue holds prepared training data in host (CPU) memory
        # When training with N > 1 GPUs, each element in the minibatch queue
        # is actually a partial minibatch which contributes 1 / N of the
        # examples to the overall minibatch
        self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size)
        self._blobs_queue_capacity = blobs_queue_capacity
        # Random queue name in case one instantiates multple RoIDataLoaders
        self._loader_id = uuid.uuid4()
        self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id)
        # Loader threads construct (partial) minibatches and put them on the
        # minibatch queue
        self._num_loaders = num_loaders
        self._num_gpus = cfg.NUM_GPUS
        self.coordinator = Coordinator()

        self._output_names = get_minibatch_blob_names()
        self._shuffle_roidb_inds()
        self.create_threads()

    def minibatch_loader_thread(self):
        """Load mini-batches and put them onto the mini-batch queue."""
        with self.coordinator.stop_on_exception():
            while not self.coordinator.should_stop():
                blobs = self.get_next_minibatch()
                # Blobs must be queued in the order specified by
                # self.get_output_names
                ordered_blobs = OrderedDict()
                for key in self.get_output_names():
                    assert blobs[key].dtype in (np.int32, np.float32), \
                        'Blob {} of dtype {} must have dtype of ' \
                        'np.int32 or np.float32'.format(key, blobs[key].dtype)
                    ordered_blobs[key] = blobs[key]
                coordinated_put(
                    self.coordinator, self._minibatch_queue, ordered_blobs
                )
        logger.info('Stopping mini-batch loading thread')

    def enqueue_blobs_thread(self, gpu_id, blob_names):
        """Transfer mini-batches from a mini-batch queue to a BlobsQueue."""
        with self.coordinator.stop_on_exception():
            while not self.coordinator.should_stop():
                if self._minibatch_queue.qsize == 0:
                    logger.warning('Mini-batch queue is empty')
                blobs = coordinated_get(self.coordinator, self._minibatch_queue)
                self.enqueue_blobs(gpu_id, blob_names, blobs.values())
                logger.debug(
                    'batch queue size {}'.format(self._minibatch_queue.qsize())
                )
            logger.info('Stopping enqueue thread')

    def get_next_minibatch(self):
        """Return the blobs to be used for the next minibatch. Thread safe."""
        valid = False
        while not valid:
            db_inds = self._get_next_minibatch_inds()
            minibatch_db = [self._roidb[i] for i in db_inds]
            blobs, valid = get_minibatch(minibatch_db)
        return blobs

    def _shuffle_roidb_inds(self):
        """Randomly permute the training roidb. Not thread safe."""
        if cfg.TRAIN.ASPECT_GROUPING:
            widths = np.array([r['width'] for r in self._roidb])
            heights = np.array([r['height'] for r in self._roidb])
            horz = (widths >= heights)
            vert = np.logical_not(horz)
            horz_inds = np.where(horz)[0]
            vert_inds = np.where(vert)[0]

            horz_inds = np.random.permutation(horz_inds)
            vert_inds = np.random.permutation(vert_inds)
            mb = cfg.TRAIN.IMS_PER_BATCH
            horz_inds = horz_inds[:(len(horz_inds) // mb) * mb]
            vert_inds = vert_inds[:(len(vert_inds) // mb) * mb]
            inds = np.hstack((horz_inds, vert_inds))

            inds = np.reshape(inds, (-1, mb))
            row_perm = np.random.permutation(np.arange(inds.shape[0]))
            inds = np.reshape(inds[row_perm, :], (-1, ))
            self._perm = inds
        else:
            self._perm = np.random.permutation(np.arange(len(self._roidb)))
        self._perm = deque(self._perm)
        self._cur = 0

    def _get_next_minibatch_inds(self):
        """Return the roidb indices for the next minibatch. Thread safe."""
        with self._lock:
            # We use a deque and always take the *first* IMS_PER_BATCH items
            # followed by *rotating* the deque so that we see fresh items
            # each time. If the length of _perm is not divisible by
            # IMS_PER_BATCH, then we end up wrapping around the permutation.
            db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)]
            self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH)
            self._cur += cfg.TRAIN.IMS_PER_BATCH
            if self._cur >= len(self._perm):
                self._shuffle_roidb_inds()
        return db_inds

    def get_output_names(self):
        return self._output_names

    def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

    def create_threads(self):
        # Create mini-batch loader threads, each of which builds mini-batches
        # and places them into a queue in CPU memory
        self._workers = [
            threading.Thread(target=self.minibatch_loader_thread)
            for _ in range(self._num_loaders)
        ]

        # Create one BlobsQueue per GPU
        # (enqueue_blob_names are unscoped)
        enqueue_blob_names = self.create_blobs_queues()

        # Create one enqueuer thread per GPU
        self._enqueuers = [
            threading.Thread(
                target=self.enqueue_blobs_thread,
                args=(gpu_id, enqueue_blob_names)
            ) for gpu_id in range(self._num_gpus)
        ]

    def start(self, prefill=False):
        for w in self._workers + self._enqueuers:
            w.setDaemon(True)
            w.start()
        if prefill:
            logger.info('Pre-filling mini-batch queue...')
            while not self._minibatch_queue.full():
                logger.info(
                    '  [{:d}/{:d}]'.format(
                        self._minibatch_queue.qsize(),
                        self._minibatch_queue.maxsize
                    )
                )
                time.sleep(0.1)
                # Detect failure and shutdown
                if self.coordinator.should_stop():
                    self.shutdown()
                    break

    def has_stopped(self):
        return self.coordinator.should_stop()

    def shutdown(self):
        self.coordinator.request_stop()
        self.coordinator.wait_for_stop()
        self.close_blobs_queues()
        for w in self._workers + self._enqueuers:
            w.join()

    def create_blobs_queues(self):
        """Create one BlobsQueue for each GPU to hold mini-batches."""
        for gpu_id in range(self._num_gpus):
            with c2_utils.GpuNameScope(gpu_id):
                workspace.RunOperatorOnce(
                    core.CreateOperator(
                        'CreateBlobsQueue', [], [self._blobs_queue_name],
                        num_blobs=len(self.get_output_names()),
                        capacity=self._blobs_queue_capacity
                    )
                )
        return self.create_enqueue_blobs()

    def close_blobs_queues(self):
        """Close a BlobsQueue."""
        for gpu_id in range(self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                workspace.RunOperatorOnce(
                    core.CreateOperator(
                        'CloseBlobsQueue', [self._blobs_queue_name], []
                    )
                )

    def create_enqueue_blobs(self):
        blob_names = self.get_output_names()
        enqueue_blob_names = [
            '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names
        ]
        for gpu_id in range(self._num_gpus):
            with c2_utils.NamedCudaScope(gpu_id):
                for blob in enqueue_blob_names:
                    workspace.CreateBlob(core.ScopedName(blob))
        return enqueue_blob_names

    def register_sigint_handler(self):
        def signal_handler(signal, frame):
            logger.info(
                'SIGINT: Shutting down RoIDataLoader threads and exiting...'
            )
            self.shutdown()

        signal.signal(signal.SIGINT, signal_handler)


================================================
FILE: detectron/roi_data/mask_rcnn.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Construct minibatches for Mask R-CNN training. Handles the minibatch blobs
that are specific to Mask R-CNN. Other blobs that are generic to RPN or
Fast/er R-CNN are handled by their respecitive roi_data modules.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np

from detectron.core.config import cfg
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils
import detectron.utils.segms as segm_utils

logger = logging.getLogger(__name__)


def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks


def _expand_to_class_specific_mask_targets(masks, mask_class_labels):
    """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
    to encode class specific mask targets.
    """
    assert masks.shape[0] == mask_class_labels.shape[0]
    M = cfg.MRCNN.RESOLUTION

    # Target values of -1 are "don't care" / ignore labels
    mask_targets = -blob_utils.ones(
        (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True
    )

    for i in range(masks.shape[0]):
        cls = int(mask_class_labels[i])
        start = M**2 * cls
        end = start + M**2
        # Ignore background instance
        # (only happens when there is no fg samples in an image)
        if cls > 0:
            mask_targets[i, start:end] = masks[i, :]

    return mask_targets


================================================
FILE: detectron/roi_data/minibatch.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Construct minibatches for Detectron networks."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import logging
import numpy as np

from detectron.core.config import cfg
import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
import detectron.roi_data.retinanet as retinanet_roi_data
import detectron.roi_data.rpn as rpn_roi_data
import detectron.utils.blob as blob_utils

logger = logging.getLogger(__name__)


def get_minibatch_blob_names(is_training=True):
    """Return blob names in the order in which they are read by the data loader.
    """
    # data blob: holds a batch of N images, each with 3 channels
    blob_names = ['data']
    if cfg.RPN.RPN_ON:
        # RPN-only or end-to-end Faster R-CNN
        blob_names += rpn_roi_data.get_rpn_blob_names(is_training=is_training)
    elif cfg.RETINANET.RETINANET_ON:
        blob_names += retinanet_roi_data.get_retinanet_blob_names(
            is_training=is_training
        )
    else:
        # Fast R-CNN like models trained on precomputed proposals
        blob_names += fast_rcnn_roi_data.get_fast_rcnn_blob_names(
            is_training=is_training
        )
    return blob_names


def get_minibatch(roidb):
    """Given a roidb, construct a minibatch sampled from it."""
    # We collect blobs from each image onto a list and then concat them into a
    # single tensor, hence we initialize each blob to an empty list
    blobs = {k: [] for k in get_minibatch_blob_names()}
    # Get the input image blob, formatted for caffe2
    im_blob, im_scales = _get_image_blob(roidb)
    blobs['data'] = im_blob
    if cfg.RPN.RPN_ON:
        # RPN-only or end-to-end Faster/Mask R-CNN
        valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb)
    elif cfg.RETINANET.RETINANET_ON:
        im_width, im_height = im_blob.shape[3], im_blob.shape[2]
        # im_width, im_height corresponds to the network input: padded image
        # (if needed) width and height. We pass it as input and slice the data
        # accordingly so that we don't need to use SampleAsOp
        valid = retinanet_roi_data.add_retinanet_blobs(
            blobs, im_scales, roidb, im_width, im_height
        )
    else:
        # Fast R-CNN like models trained on precomputed proposals
        valid = fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
    return blobs, valid


def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(
        0, high=len(cfg.TRAIN.SCALES), size=num_images
    )
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        assert im is not None, \
            'Failed to read image \'{}\''.format(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = blob_utils.prep_im_for_blob(
            im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE
        )
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = blob_utils.im_list_to_blob(processed_ims)

    return blob, im_scales


================================================
FILE: detectron/roi_data/retinanet.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Compute minibatch blobs for training a RetinaNet network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging

import detectron.utils.boxes as box_utils
import detectron.roi_data.data_utils as data_utils
from detectron.core.config import cfg


logger = logging.getLogger(__name__)


def get_retinanet_blob_names(is_training=True):
    """
    Returns blob names in the order in which they are read by the data
    loader.

    N = number of images per minibatch
    A = number of anchors = num_scales * num_aspect_ratios
        (for example 9 used in RetinaNet paper)
    H, W = spatial dimensions (different for each FPN level)
    M = Out of all the anchors generated, depending on the positive/negative IoU
        overlap thresholds, we will have M positive anchors. These are the anchors
        that bounding box branch will regress on.

    retnet_cls_labels -> labels for the cls branch for each FPN level
                         Shape: N x A x H x W

    retnet_roi_bbox_targets -> targets for the bbox regression branch
                               Shape: M x 4

    retnet_roi_fg_bbox_locs -> for the bbox regression, since we are only
                               interested in regressing on fg bboxes which are
                               M in number and the output prediction of the network
                               is of shape N x (A * 4) x H x W
                               (in case of non class-specific bbox), so we
                               store the locations of positive fg boxes in this
                               blob retnet_roi_fg_bbox_locs of shape M x 4 where
                               each row looks like: [img_id, anchor_id, x_loc, y_loc]
    """
    # im_info: (height, width, image scale)
    blob_names = ['im_info']
    assert cfg.FPN.FPN_ON, "RetinaNet uses FPN for dense detection"
    # Same format as RPN blobs, but one per FPN level
    if is_training:
        blob_names += ['retnet_fg_num', 'retnet_bg_num']
        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
            suffix = 'fpn{}'.format(lvl)
            blob_names += [
                'retnet_cls_labels_' + suffix,
                'retnet_roi_bbox_targets_' + suffix,
                'retnet_roi_fg_bbox_locs_' + suffix,
            ]
    return blob_names


def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
    """Add RetinaNet blobs."""
    # RetinaNet is applied to many feature levels, as in the FPN paper
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE

    # get anchors from all levels for all scales/aspect ratios
    foas = []
    for lvl in range(k_min, k_max + 1):
        stride = 2. ** lvl
        for octave in range(scales_per_octave):
            octave_scale = 2 ** (octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(
                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
                foas.append(foa)
    all_anchors = np.concatenate([f.field_of_anchors for f in foas])

    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        assert len(gt_inds) > 0, \
            'Empty ground truth empty for image is not allowed. Please check.'

        gt_rois = entry['boxes'][gt_inds, :] * scale
        gt_classes = entry['gt_classes'][gt_inds]

        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
        for i, foa in enumerate(foas):
            for k, v in retinanet_blobs[i].items():
                # the way it stacks is:
                # [[anchors for image1] + [anchors for images 2]]
                level = int(np.log2(foa.stride))
                key = '{}_fpn{}'.format(k, level)
                if k == 'retnet_roi_fg_bbox_locs':
                    v[:, 0] = im_i
                    # loc_stride: 80 * 4 if cls_specific else 4
                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction
                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
                    # v[:, 1] is the class label [range 0-80] if we do
                    # class-specfic bbox otherwise it is 0. In case of class
                    # specific, based on the label, the location of current
                    # anchor is class_label * 4 and then we take into account
                    # the anchor_ind if the anchors
                    v[:, 1] *= 4
                    v[:, 1] += loc_stride * anchor_ind
                blobs[key].append(v)
        blobs['retnet_fg_num'] += fg_num
        blobs['retnet_bg_num'] += bg_num

    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)

    N = len(roidb)
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            # compute number of anchors
            A = int(len(v) / N)
            # for the cls branch labels [per fpn level],
            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
            # and length of this list is N x A where
            # N = num_images, A = num_anchors for example, N = 2, A = 9
            # Each element of the list has the shape 1 x 1 x H x W where H, W are
            # spatial dimension of curret fpn lvl. Let a{i} denote the element
            # corresponding to anchor i [9 anchors total] in the list.
            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
            # however the network will make predictions like 2 x (9 * 80) x H x W
            # so we first concatenate the elements of each image to a numpy array
            # and then concatenate the two images to get the 2 x 9 x H x W

            if k.find('retnet_cls_labels') >= 0:
                tmp = []
                # concat anchors within an image
                for i in range(0, len(v), A):
                    tmp.append(np.concatenate(v[i: i + A], axis=1))
                # concat images
                blobs[k] = np.concatenate(tmp, axis=0)
            else:
                # for the bbox branch elements [per FPN level],
                #  we have the targets and the fg boxes locations
                # in the shape: M x 4 where M is the number of fg locations in a
                # given image at the current FPN level. For the given level,
                # the bbox predictions will be. The elements in the list are in
                # order [[a0, ..., a9], [a0, ..., a9]]
                # Concatenate them to form M x 4
                blobs[k] = np.concatenate(v, axis=0)
    return True


def _get_retinanet_blobs(
        foas, all_anchors, gt_boxes, gt_classes, im_width, im_height):
    total_anchors = all_anchors.shape[0]
    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
        im_height, im_width))

    inds_inside = np.arange(all_anchors.shape[0])
    anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.float32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[
            np.arange(num_inside), anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]
        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
        # Fg label: above threshold IOU
        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
        gt_inds = anchor_to_gt_argmax[inds]
        labels[inds] = gt_classes[gt_inds]

    fg_inds = np.where(labels >= 1)[0]
    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
    labels[bg_inds] = 0
    num_fg, num_bg = len(fg_inds), len(bg_inds)

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        end_idx = start_idx + H * W
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, height, width)
        _labels = _labels.reshape((1, 1, H, W))
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2)
        stride = foa.stride
        w = int(im_width / stride)
        h = int(im_height / stride)

        # data for select_smooth_l1 loss
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        inds_4d = np.where(_labels > 0)
        M = len(inds_4d)
        _roi_bbox_targets = np.zeros((0, 4))
        _roi_fg_bbox_locs = np.zeros((0, 4))
        if M > 0:
            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
            _roi_bbox_targets = np.zeros((len(im_inds), 4))
            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
            lbls = _labels[im_inds, :, y, x]
            for i, lbl in enumerate(lbls):
                l = lbl[0] - 1
                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                    l = 0
                assert l >= 0 and l < num_classes, 'label out of the range'
                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])
        blobs_out.append(
            dict(
                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),
                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),
            ))
    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
    out_num_bg = (
        np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
        out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
    return blobs_out, out_num_fg, out_num_bg


================================================
FILE: detectron/roi_data/rpn.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Minibatch construction for Region Proposal Networks (RPN)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import numpy.random as npr

from detectron.core.config import cfg
import detectron.roi_data.data_utils as data_utils
import detectron.utils.blob as blob_utils
import detectron.utils.boxes as box_utils

logger = logging.getLogger(__name__)


def get_rpn_blob_names(is_training=True):
    """Blob names used by RPN."""
    # im_info: (height, width, image scale)
    blob_names = ['im_info']
    if is_training:
        # gt boxes: (batch_idx, x1, y1, x2, y2, cls)
        blob_names += ['roidb']
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # Same format as RPN blobs, but one per FPN level
            for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
                blob_names += [
                    'rpn_labels_int32_wide_fpn' + str(lvl),
                    'rpn_bbox_targets_wide_fpn' + str(lvl),
                    'rpn_bbox_inside_weights_wide_fpn' + str(lvl),
                    'rpn_bbox_outside_weights_wide_fpn' + str(lvl)
                ]
        else:
            # Single level RPN blobs
            blob_names += [
                'rpn_labels_int32_wide',
                'rpn_bbox_targets_wide',
                'rpn_bbox_inside_weights_wide',
                'rpn_bbox_outside_weights_wide'
            ]
    return blob_names


def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(
                field_stride, anchor_sizes, anchor_aspect_ratios
            )
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(
            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
        )
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
        )[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois
            )
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois
            )
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    blobs['roidb'] = blob_utils.serialize(minimal_roidb)

    # Always return valid=True, since RPN minibatches are valid by design
    return True


def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh) &
            (all_anchors[:, 1] >= -straddle_thresh) &
            (all_anchors[:, 2] < im_width + straddle_thresh) &
            (all_anchors[:, 3] < im_height + straddle_thresh)
        )[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])
        ]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max
        )[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False
        )
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
    if len(bg_inds) > num_bg:
        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
    else:
        enable_inds = bg_inds

    labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]
    )

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(
        bbox_targets, total_anchors, inds_inside, fill=0
    )
    bbox_inside_weights = data_utils.unmap(
        bbox_inside_weights, total_anchors, inds_inside, fill=0
    )
    bbox_outside_weights = data_utils.unmap(
        bbox_outside_weights, total_anchors, inds_inside, fill=0
    )

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(
                rpn_labels_int32_wide=_labels,
                rpn_bbox_targets_wide=_bbox_targets,
                rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                rpn_bbox_outside_weights_wide=_bbox_outside_weights
            )
        )
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out


================================================
FILE: detectron/tests/data_loader_benchmark.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Example usage:
# data_loader_benchmark.par \
#   NUM_GPUS 2 \
#   TRAIN.DATASETS "('voc_2007_trainval',)" \
#   TRAIN.PROPOSAL_FILES /path/to/voc_2007_trainval/proposals.pkl \
#   DATA_LOADER.NUM_THREADS 4 \
#   DATA_LOADER.MINIBATCH_QUEUE_SIZE 64 \
#   DATA_LOADER.BLOBS_QUEUE_CAPACITY 8

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import logging
import numpy as np
import pprint
import sys
import time

from caffe2.python import core
from caffe2.python import muji
from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import merge_cfg_from_file
from detectron.core.config import merge_cfg_from_list
from detectron.datasets.roidb import combined_roidb_for_training
from detectron.roi_data.loader import RoIDataLoader
from detectron.utils.logging import setup_logging
from detectron.utils.timer import Timer


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--num-batches', dest='num_batches',
        help='Number of minibatches to run',
        default=200, type=int)
    parser.add_argument(
        '--sleep', dest='sleep_time',
        help='Seconds sleep to emulate a network running',
        default=0.1, type=float)
    parser.add_argument(
        '--cfg', dest='cfg_file', help='optional config file', default=None,
        type=str)
    parser.add_argument(
        '--x-factor', dest='x_factor', help='simulates x-factor more GPUs',
        default=1, type=int)
    parser.add_argument(
        '--profiler', dest='profiler', help='profile minibatch load time',
        action='store_true')
    parser.add_argument(
        'opts', help='See detectron/core/config.py for all options', default=None,
        nargs=argparse.REMAINDER)
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()
    return args


def loader_loop(roi_data_loader):
    load_timer = Timer()
    iters = 100
    for i in range(iters):
        load_timer.tic()
        roi_data_loader.get_next_minibatch()
        load_timer.toc()
        print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format(
              i + 1, iters, load_timer.average_time))


def main(opts):
    logger = logging.getLogger(__name__)
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
    logger.info('{:d} roidb entries'.format(len(roidb)))
    roi_data_loader = RoIDataLoader(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    blob_names = roi_data_loader.get_output_names()

    net = core.Net('dequeue_net')
    net.type = 'dag'
    all_blobs = []
    for gpu_id in range(cfg.NUM_GPUS):
        with core.NameScope('gpu_{}'.format(gpu_id)):
            with core.DeviceScope(muji.OnGPU(gpu_id)):
                for blob_name in blob_names:
                    blob = core.ScopedName(blob_name)
                    all_blobs.append(blob)
                    workspace.CreateBlob(blob)
                    logger.info('Creating blob: {}'.format(blob))
                net.DequeueBlobs(
                    roi_data_loader._blobs_queue_name, blob_names)
    logger.info("Protobuf:\n" + str(net.Proto()))

    if opts.profiler:
        import cProfile
        cProfile.runctx(
            'loader_loop(roi_data_loader)', globals(), locals(),
            sort='cumulative')
    else:
        loader_loop(roi_data_loader)

    roi_data_loader.register_sigint_handler()
    roi_data_loader.start(prefill=True)
    total_time = 0
    for i in range(opts.num_batches):
        start_t = time.time()
        for _ in range(opts.x_factor):
            workspace.RunNetOnce(net)
        total_time += (time.time() - start_t) / opts.x_factor
        logger.info(
            '{:d}/{:d}: Averge dequeue time: {:.3f}s  [{:d}/{:d}]'.format(
                i + 1, opts.num_batches, total_time / (i + 1),
                roi_data_loader._minibatch_queue.qsize(),
                cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE
            )
        )
        # Sleep to simulate the time taken by running a little network
        time.sleep(opts.sleep_time)
        # To inspect:
        # blobs = workspace.FetchBlobs(all_blobs)
        # from IPython import embed; embed()
    logger.info('Shutting down data loader...')
    roi_data_loader.shutdown()


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    logger = setup_logging(__name__)
    logger.setLevel(logging.DEBUG)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    np.random.seed(cfg.RNG_SEED)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    logger.info('Running with config:')
    logger.info(pprint.pformat(cfg))
    main(args)


================================================
FILE: detectron/tests/test_batch_permutation_op.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import gradient_checker
from caffe2.python import workspace

import detectron.utils.logging as logging_utils
import detectron.utils.c2 as c2_utils


class BatchPermutationOpTest(unittest.TestCase):
    def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

    def _run_speed_test(self, iters=5, N=1024):
        """This function provides an example of how to benchmark custom
        operators using the Caffe2 'prof_dag' network execution type. Please
        note that for 'prof_dag' to work, Caffe2 must be compiled with profiling
        support using the `-DUSE_PROF=ON` option passed to `cmake` when building
        Caffe2.
        """
        net = core.Net('test')
        net.Proto().type = 'prof_dag'
        net.Proto().num_workers = 2
        Y = net.BatchPermutation(['X', 'I'], 'Y')
        Y_flat = net.FlattenToVec([Y], 'Y_flat')
        loss = net.AveragedLoss([Y_flat], 'loss')
        net.AddGradientOperators([loss])
        workspace.CreateNet(net)

        X = np.random.randn(N, 256, 14, 14)
        for _i in range(iters):
            I = np.random.permutation(N)
            workspace.FeedBlob('X', X.astype(np.float32))
            workspace.FeedBlob('I', I.astype(np.int32))
            workspace.RunNet(net.Proto().name)
            np.testing.assert_allclose(
                workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08
            )

    def test_forward_and_gradient(self):
        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
        I = np.array([0, 1], dtype=np.int32)
        self._run_op_test(A, I, check_grad=True)

        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
        I = np.array([1, 0], dtype=np.int32)
        self._run_op_test(A, I, check_grad=True)

        A = np.random.randn(10, 3, 5, 7).astype(np.float32)
        I = np.array(np.random.permutation(10), dtype=np.int32)
        self._run_op_test(A, I, check_grad=True)

    def test_size_exceptions(self):
        A = np.random.randn(2, 256, 42, 86).astype(np.float32)
        I = np.array(np.random.permutation(10), dtype=np.int32)
        with self.assertRaises(RuntimeError):
            self._run_op_test(A, I)

    # See doc string in _run_speed_test
    # def test_perf(self):
    #     with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
    #         self._run_speed_test()


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    c2_utils.import_detectron_ops()
    assert 'BatchPermutation' in workspace.RegisteredOperators()
    logging_utils.setup_logging(__name__)
    unittest.main()


================================================
FILE: detectron/tests/test_bbox_transform.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from pycocotools import mask as COCOmask

import detectron.utils.boxes as box_utils


def random_boxes(mean_box, stdev, N):
    boxes = np.random.randn(N, 4) * stdev + mean_box
    return boxes.astype(dtype=np.float32)


class TestBboxTransform(unittest.TestCase):
    def test_bbox_transform_and_inverse(self):
        weights = (5, 5, 10, 10)
        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
        deltas = box_utils.bbox_transform_inv(
            src_boxes, dst_boxes, weights=weights
        )
        dst_boxes_reconstructed = box_utils.bbox_transform(
            src_boxes, deltas, weights=weights
        )
        np.testing.assert_array_almost_equal(
            dst_boxes, dst_boxes_reconstructed, decimal=5
        )

    def test_bbox_dataset_to_prediction_roundtrip(self):
        """Simulate the process of reading a ground-truth box from a dataset,
        make predictions from proposals, convert the predictions back to the
        dataset format, and then use the COCO API to compute IoU overlap between
        the gt box and the predictions. These should have IoU of 1.
        """
        weights = (5, 5, 10, 10)
        # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
        gt_xywh_box = [10, 20, 100, 150]
        # 2/ convert it to our internal (x1, y1, x2, y2) format
        gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
        # 3/ consider nearby proposal boxes
        prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
        # 4/ compute proposal-to-gt transformation deltas
        deltas = box_utils.bbox_transform_inv(
            prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
        )
        # 5/ use deltas to transform proposals to xyxy predicted box
        pred_xyxy_boxes = box_utils.bbox_transform(
            prop_xyxy_boxes, deltas, weights=weights
        )
        # 6/ convert xyxy predicted box to xywh predicted box
        pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
        # 7/ use COCO API to compute IoU
        not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
        ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
        np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))

    def test_cython_bbox_iou_against_coco_api_bbox_iou(self):
        """Check that our cython implementation of bounding box IoU overlap
        matches the COCO API implementation.
        """
        def _do_test(b1, b2):
            # Compute IoU overlap with the cython implementation
            cython_iou = box_utils.bbox_overlaps(b1, b2)
            # Compute IoU overlap with the COCO API implementation
            # (requires converting boxes from xyxy to xywh format)
            xywh_b1 = box_utils.xyxy_to_xywh(b1)
            xywh_b2 = box_utils.xyxy_to_xywh(b2)
            not_crowd = [int(False)] * b2.shape[0]
            coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
            # IoUs should be similar
            np.testing.assert_array_almost_equal(
                cython_iou, coco_ious, decimal=5
            )

        # Test small boxes
        b1 = random_boxes([10, 10, 20, 20], 5, 10)
        b2 = random_boxes([10, 10, 20, 20], 5, 10)
        _do_test(b1, b2)

        # Test bigger boxes
        b1 = random_boxes([10, 10, 110, 20], 20, 10)
        b2 = random_boxes([10, 10, 110, 20], 20, 10)
        _do_test(b1, b2)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: detectron/tests/test_cfg.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import tempfile
import unittest

from detectron.core.config import cfg
from detectron.utils.collections import AttrDict
import detectron.core.config as core_config
import detectron.utils.env as envu
import detectron.utils.logging as logging_utils


class TestAttrDict(unittest.TestCase):
    def test_immutability(self):
        # Top level immutable
        a = AttrDict()
        a.foo = 0
        a.immutable(True)
        with self.assertRaises(AttributeError):
            a.foo = 1
            a.bar = 1
        assert a.is_immutable()
        assert a.foo == 0
        a.immutable(False)
        assert not a.is_immutable()
        a.foo = 1
        assert a.foo == 1

        # Recursively immutable
        a.level1 = AttrDict()
        a.level1.foo = 0
        a.level1.level2 = AttrDict()
        a.level1.level2.foo = 0
        a.immutable(True)
        assert a.is_immutable()
        with self.assertRaises(AttributeError):
            a.level1.level2.foo = 1
            a.level1.bar = 1
        assert a.level1.level2.foo == 0

        # Serialize immutability state
        a.immutable(True)
        a2 = core_config.load_cfg(envu.yaml_dump(a))
        assert a.is_immutable()
        assert a2.is_immutable()


class TestCfg(unittest.TestCase):
    def test_copy_cfg(self):
        cfg2 = copy.deepcopy(cfg)
        s = cfg.MODEL.TYPE
        cfg2.MODEL.TYPE = 'dummy'
        assert cfg.MODEL.TYPE == s

    def test_merge_cfg_from_cfg(self):
        # Test: merge from deepcopy
        s = 'dummy0'
        cfg2 = copy.deepcopy(cfg)
        cfg2.MODEL.TYPE = s
        core_config.merge_cfg_from_cfg(cfg2)
        assert cfg.MODEL.TYPE == s

        # Test: merge from yaml
        s = 'dummy1'
        cfg2 = core_config.load_cfg(envu.yaml_dump(cfg))
        cfg2.MODEL.TYPE = s
        core_config.merge_cfg_from_cfg(cfg2)
        assert cfg.MODEL.TYPE == s

        # Test: merge with a valid key
        s = 'dummy2'
        cfg2 = AttrDict()
        cfg2.MODEL = AttrDict()
        cfg2.MODEL.TYPE = s
        core_config.merge_cfg_from_cfg(cfg2)
        assert cfg.MODEL.TYPE == s

        # Test: merge with an invalid key
        s = 'dummy3'
        cfg2 = AttrDict()
        cfg2.FOO = AttrDict()
        cfg2.FOO.BAR = s
        with self.assertRaises(KeyError):
            core_config.merge_cfg_from_cfg(cfg2)

        # Test: merge with converted type
        cfg2 = AttrDict()
        cfg2.TRAIN = AttrDict()
        cfg2.TRAIN.SCALES = [1]
        core_config.merge_cfg_from_cfg(cfg2)
        assert type(cfg.TRAIN.SCALES) is tuple
        assert cfg.TRAIN.SCALES[0] == 1

        # Test: merge with invalid type
        cfg2 = AttrDict()
        cfg2.TRAIN = AttrDict()
        cfg2.TRAIN.SCALES = 1
        with self.assertRaises(ValueError):
            core_config.merge_cfg_from_cfg(cfg2)

    def test_merge_cfg_from_file(self):
        with tempfile.NamedTemporaryFile() as f:
            envu.yaml_dump(cfg, f)
            s = cfg.MODEL.TYPE
            cfg.MODEL.TYPE = 'dummy'
            assert cfg.MODEL.TYPE != s
            core_config.merge_cfg_from_file(f.name)
            assert cfg.MODEL.TYPE == s

    def test_merge_cfg_from_list(self):
        opts = [
            'TRAIN.SCALES', '(100, )', 'MODEL.TYPE', u'foobar', 'NUM_GPUS', 2
        ]
        assert len(cfg.TRAIN.SCALES) > 0
        assert cfg.TRAIN.SCALES[0] != 100
        assert cfg.MODEL.TYPE != 'foobar'
        assert cfg.NUM_GPUS != 2
        core_config.merge_cfg_from_list(opts)
        assert type(cfg.TRAIN.SCALES) is tuple
        assert len(cfg.TRAIN.SCALES) == 1
        assert cfg.TRAIN.SCALES[0] == 100
        assert cfg.MODEL.TYPE == 'foobar'
        assert cfg.NUM_GPUS == 2

    def test_deprecated_key_from_list(self):
        # You should see logger messages like:
        #   "Deprecated config key (ignoring): MODEL.DILATION"
        opts = ['FINAL_MSG', 'foobar', 'MODEL.DILATION', 2]
        with self.assertRaises(AttributeError):
            _ = cfg.FINAL_MSG  # noqa
        with self.assertRaises(AttributeError):
            _ = cfg.MODEL.DILATION  # noqa
        core_config.merge_cfg_from_list(opts)
        with self.assertRaises(AttributeError):
            _ = cfg.FINAL_MSG  # noqa
        with self.assertRaises(AttributeError):
            _ = cfg.MODEL.DILATION  # noqa

    def test_deprecated_key_from_file(self):
        # You should see logger messages like:
        #   "Deprecated config key (ignoring): MODEL.DILATION"
        with tempfile.NamedTemporaryFile() as f:
            cfg2 = copy.deepcopy(cfg)
            cfg2.MODEL.DILATION = 2
            envu.yaml_dump(cfg2, f)
            with self.assertRaises(AttributeError):
                _ = cfg.MODEL.DILATION  # noqa
            core_config.merge_cfg_from_file(f.name)
            with self.assertRaises(AttributeError):
                _ = cfg.MODEL.DILATION  # noqa

    def test_renamed_key_from_list(self):
        # You should see logger messages like:
        #  "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;
        #  please update your config"
        opts = ['EXAMPLE.RENAMED.KEY', 'foobar']
        with self.assertRaises(AttributeError):
            _ = cfg.EXAMPLE.RENAMED.KEY  # noqa
        with self.assertRaises(KeyError):
            core_config.merge_cfg_from_list(opts)

    def test_renamed_key_from_file(self):
        # You should see logger messages like:
        #  "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;
        #  please update your config"
        with tempfile.NamedTemporaryFile() as f:
            cfg2 = copy.deepcopy(cfg)
            cfg2.EXAMPLE = AttrDict()
            cfg2.EXAMPLE.RENAMED = AttrDict()
            cfg2.EXAMPLE.RENAMED.KEY = 'foobar'
            envu.yaml_dump(cfg2, f)
            with self.assertRaises(AttributeError):
                _ = cfg.EXAMPLE.RENAMED.KEY  # noqa
            with self.assertRaises(KeyError):
                core_config.merge_cfg_from_file(f.name)


if __name__ == '__main__':
    logging_utils.setup_logging(__name__)
    unittest.main()


================================================
FILE: detectron/tests/test_loader.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging
import unittest
import unittest.mock as mock

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import muji
from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.roi_data.loader import RoIDataLoader
import detectron.utils.logging as logging_utils


def get_roidb_blobs(roidb):
    blobs = {}
    blobs['data'] = np.stack([entry['data'] for entry in roidb])
    return blobs, True


def get_net(data_loader, name):
    logger = logging.getLogger(__name__)
    blob_names = data_loader.get_output_names()
    net = core.Net(name)
    net.type = 'dag'
    for gpu_id in range(cfg.NUM_GPUS):
        with core.NameScope('gpu_{}'.format(gpu_id)):
            with core.DeviceScope(muji.OnGPU(gpu_id)):
                for blob_name in blob_names:
                    blob = core.ScopedName(blob_name)
                    workspace.CreateBlob(blob)
                net.DequeueBlobs(
                    data_loader._blobs_queue_name, blob_names)
    logger.info("Protobuf:\n" + str(net.Proto()))

    return net


def get_roidb_sample_data(sample_data):
    roidb = []
    for _ in range(np.random.randint(4, 10)):
        roidb.append({'data': sample_data})
    return roidb


def create_loader_and_network(sample_data, name):
    roidb = get_roidb_sample_data(sample_data)
    loader = RoIDataLoader(roidb)
    net = get_net(loader, 'dequeue_net_train')
    loader.register_sigint_handler()
    loader.start(prefill=False)
    return loader, net


def run_net(net):
    workspace.RunNetOnce(net)
    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, 0)
    name_scope = 'gpu_{}'.format(0)
    with core.NameScope(name_scope):
        with core.DeviceScope(gpu_dev):
            data = workspace.FetchBlob(core.ScopedName('data'))
            return data


class TestRoIDataLoader(unittest.TestCase):
    @mock.patch(
        'detectron.roi_data.loader.get_minibatch_blob_names',
        return_value=[u'data']
    )
    @mock.patch(
        'detectron.roi_data.loader.get_minibatch',
        side_effect=get_roidb_blobs
    )
    def test_two_parallel_loaders(self, _1, _2):
        train_data = np.random.rand(2, 3, 3).astype(np.float32)
        train_loader, train_net = create_loader_and_network(train_data,
                                                            'dequeue_net_train')
        test_data = np.random.rand(2, 4, 4).astype(np.float32)
        test_loader, test_net = create_loader_and_network(test_data,
                                                          'dequeue_net_test')
        for _ in range(5):
            data = run_net(train_net)
            self.assertEqual(data[0].tolist(), train_data.tolist())
            data = run_net(test_net)
            self.assertEqual(data[0].tolist(), test_data.tolist())
        test_loader.shutdown()
        train_loader.shutdown()


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    logger = logging_utils.setup_logging(__name__)
    logger.setLevel(logging.DEBUG)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    np.random.seed(cfg.RNG_SEED)
    cfg.TRAIN.ASPECT_GROUPING = False
    cfg.NUM_GPUS = 2
    assert_and_infer_cfg()
    unittest.main()


================================================
FILE: detectron/tests/test_restore_checkpoint.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import os
import shutil
import tempfile

from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import get_output_dir
from detectron.datasets.roidb import combined_roidb_for_training
from detectron.modeling import model_builder
from detectron.utils.logging import setup_logging
import detectron.utils.c2 as c2_utils
import detectron.utils.net as nu

c2_utils.import_detectron_ops()


def get_params(model):
    blobs = {}  # gpu_0 blobs with unscoped_name as key
    all_blobs = {}  # all blobs with scoped name as key
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if 'gpu_0' in scoped_name:
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
    for param in model.TrainableParams():
        scoped_name = str(param) + '_momentum'
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if 'gpu_0' in scoped_name:
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
    return blobs, all_blobs


def add_momentum_init_ops(model):
    for param in model.TrainableParams(gpu_id=0):
        model.param_init_net.GaussianFill(
            [param + '_momentum'], param + '_momentum', mean=0.0, std=1.0)


def init_weights(model):
    # init weights in gpu_id = 0 and then broadcast
    workspace.RunNetOnce(model.param_init_net)
    nu.broadcast_parameters(model)


def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    logger = setup_logging(__name__)
    logger.setLevel(logging.DEBUG)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    np.random.seed(cfg.RNG_SEED)
    output_dir = tempfile.mkdtemp()
    # Generate config for test
    cfg.MODEL.TYPE = 'generalized_rcnn'
    cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body'
    cfg.MODEL.NUM_CLASSES = 81
    cfg.MODEL.FASTER_RCNN = True
    cfg.FPN.FPN_ON = True
    cfg.FPN.MULTILEVEL_ROIS = True
    cfg.FPN.MULTILEVEL_RPN = True
    cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head'
    cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign'
    cfg.OUTPUT_DIR = output_dir
    cfg.TRAIN.DATASETS = ('coco_2014_minival',)
    cfg.TRAIN.WEIGHTS = b''
    for num_gpu in range(workspace.NumCudaDevices()):
        cfg.immutable(False)
        cfg.NUM_GPUS = num_gpu + 1
        assert_and_infer_cfg()
        test_restore_checkpoint()


================================================
FILE: detectron/tests/test_smooth_l1_loss_op.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import gradient_checker
from caffe2.python import workspace

import detectron.utils.c2 as c2_utils
import detectron.utils.logging as logging_utils


class SmoothL1LossTest(unittest.TestCase):
    def test_forward_and_gradient(self):
        Y = np.random.randn(128, 4 * 21).astype(np.float32)
        Y_hat = np.random.randn(128, 4 * 21).astype(np.float32)
        inside_weights = np.random.randn(128, 4 * 21).astype(np.float32)
        inside_weights[inside_weights < 0] = 0
        outside_weights = np.random.randn(128, 4 * 21).astype(np.float32)
        outside_weights[outside_weights < 0] = 0
        scale = np.random.random()
        beta = np.random.random()

        op = core.CreateOperator(
            'SmoothL1Loss', ['Y_hat', 'Y', 'inside_weights', 'outside_weights'],
            ['loss'],
            scale=scale,
            beta=beta
        )

        gc = gradient_checker.GradientChecker(
            stepsize=0.005,
            threshold=0.005,
            device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
        )

        res, grad, grad_estimated = gc.CheckSimple(
            op, [Y_hat, Y, inside_weights, outside_weights], 0, [0]
        )

        self.assertTrue(
            grad.shape == grad_estimated.shape,
            'Fail check: grad.shape != grad_estimated.shape'
        )

        # To inspect the gradient and estimated gradient:
        # np.set_printoptions(precision=3, suppress=True)
        # print('grad:')
        # print(grad)
        # print('grad_estimated:')
        # print(grad_estimated)

        self.assertTrue(res)


if __name__ == '__main__':
    c2_utils.import_detectron_ops()
    assert 'SmoothL1Loss' in workspace.RegisteredOperators()
    logging_utils.setup_logging(__name__)
    unittest.main()


================================================
FILE: detectron/tests/test_spatial_narrow_as_op.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import gradient_checker
from caffe2.python import workspace

import detectron.utils.c2 as c2_utils
import detectron.utils.logging as logging_utils


class SpatialNarrowAsOpTest(unittest.TestCase):
    def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

    def test_small_forward_and_gradient(self):
        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
        B = np.random.randn(2, 3, 2, 2).astype(np.float32)
        self._run_test(A, B, check_grad=True)

        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
        B = np.random.randn(2, 3, 5).astype(np.float32)
        self._run_test(A, B, check_grad=True)

    def test_large_forward(self):
        A = np.random.randn(2, 256, 42, 100).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        self._run_test(A, B)

        A = np.random.randn(2, 256, 42, 87).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        self._run_test(A, B)

    def test_size_exceptions(self):
        A = np.random.randn(2, 256, 42, 86).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        with self.assertRaises(RuntimeError):
            self._run_test(A, B)

        A = np.random.randn(2, 255, 42, 88).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        with self.assertRaises(RuntimeError):
            self._run_test(A, B)


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    c2_utils.import_detectron_ops()
    assert 'SpatialNarrowAs' in workspace.RegisteredOperators()
    logging_utils.setup_logging(__name__)
    unittest.main()


================================================
FILE: detectron/tests/test_zero_even_op.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import workspace

import detectron.utils.c2 as c2_utils


class ZeroEvenOpTest(unittest.TestCase):

    def _run_zero_even_op(self, X):
        op = core.CreateOperator('ZeroEven', ['X'], ['Y'])
        workspace.FeedBlob('X', X)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')
        return Y

    def _run_zero_even_op_gpu(self, X):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('ZeroEven', ['X'], ['Y'])
            workspace.FeedBlob('X', X)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')
        return Y

    def test_throws_on_non_1D_arrays(self):
        X = np.zeros((2, 2), dtype=np.float32)
        with self.assertRaisesRegex(RuntimeError, 'X\.ndim\(\) == 1'):
            self._run_zero_even_op(X)

    def test_handles_empty_arrays(self):
        X = np.array([], dtype=np.float32)
        Y_exp = np.copy(X)
        Y_act = self._run_zero_even_op(X)
        np.testing.assert_allclose(Y_act, Y_exp)

    def test_sets_vals_at_even_inds_to_zero(self):
        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
        Y_act = self._run_zero_even_op(X)
        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])

    def test_preserves_vals_at_odd_inds(self):
        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
        Y_act = self._run_zero_even_op(X)
        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])

    def test_handles_even_length_arrays(self):
        X = np.random.rand(64).astype(np.float32)
        Y_exp = np.copy(X)
        Y_exp[0::2] = 0.0
        Y_act = self._run_zero_even_op(X)
        np.testing.assert_allclose(Y_act, Y_exp)

    def test_handles_odd_length_arrays(self):
        X = np.random.randn(77).astype(np.float32)
        Y_exp = np.copy(X)
        Y_exp[0::2] = 0.0
        Y_act = self._run_zero_even_op(X)
        np.testing.assert_allclose(Y_act, Y_exp)

    def test_gpu_throws_on_non_1D_arrays(self):
        X = np.zeros((2, 2), dtype=np.float32)
        with self.assertRaisesRegex(RuntimeError, 'X\.ndim\(\) == 1'):
            self._run_zero_even_op_gpu(X)

    def test_gpu_handles_empty_arrays(self):
        X = np.array([], dtype=np.float32)
        Y_exp = np.copy(X)
        Y_act = self._run_zero_even_op_gpu(X)
        np.testing.assert_allclose(Y_act, Y_exp)

    def test_gpu_sets_vals_at_even_inds_to_zero(self):
        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
        Y_act = self._run_zero_even_op_gpu(X)
        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])

    def test_gpu_preserves_vals_at_odd_inds(self):
        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
        Y_act = self._run_zero_even_op_gpu(X)
        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])

    def test_gpu_handles_even_length_arrays(self):
        X = np.random.rand(64).astype(np.float32)
        Y_exp = np.copy(X)
        Y_exp[0::2] = 0.0
        Y_act = self._run_zero_even_op_gpu(X)
        np.testing.assert_allclose(Y_act, Y_exp)

    def test_gpu_handles_odd_length_arrays(self):
        X = np.random.randn(77).astype(np.float32)
        Y_exp = np.copy(X)
        Y_exp[0::2] = 0.0
        Y_act = self._run_zero_even_op_gpu(X)
        np.testing.assert_allclose(Y_act, Y_exp)


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    c2_utils.import_custom_ops()
    assert 'ZeroEven' in workspace.RegisteredOperators()
    unittest.main()


================================================
FILE: detectron/utils/__init__.py
================================================


================================================
FILE: detectron/utils/blob.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Caffe2 blob helper functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np
from six.moves import cPickle as pickle

from caffe2.proto import caffe2_pb2

from detectron.core.config import cfg


def get_image_blob(im, target_scale, target_max_size):
    """Convert an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale (float): image scale (target size) / (original size)
        im_info (ndarray)
    """
    processed_im, im_scale = prep_im_for_blob(
        im, cfg.PIXEL_MEANS, target_scale, target_max_size
    )
    blob = im_list_to_blob(processed_im)
    # NOTE: this height and width may be larger than actual scaled input image
    # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
    # maintaining this behavior for now to make existing results exactly
    # reproducible (in practice using the true input image height and width
    # yields nearly the same results, but they are sometimes slightly different
    # because predictions near the edge of the image will be pruned more
    # aggressively).
    height, width = blob.shape[2], blob.shape[3]
    im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
    return blob, im_scale, im_info.astype(np.float32)


def im_list_to_blob(ims):
    """Convert a list of images into a network input. Assumes images were
    prepared using prep_im_for_blob or equivalent: i.e.
      - BGR channel order
      - pixel means subtracted
      - resized to the desired input size
      - float32 numpy ndarray format
    Output is a 4D HCHW tensor of the images concatenated along axis 0 with
    shape.
    """
    if not isinstance(ims, list):
        ims = [ims]
    max_shape = np.array([im.shape for im in ims]).max(axis=0)
    # Pad the image so they can be divisible by a stride
    if cfg.FPN.FPN_ON:
        stride = float(cfg.FPN.COARSEST_STRIDE)
        max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
        max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)

    num_images = len(ims)
    blob = np.zeros(
        (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32
    )
    for i in range(num_images):
        im = ims[i]
        blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
    # Move channels (axis 3) to axis 1
    # Axis order will become: (batch elem, channel, height, width)
    channel_swap = (0, 3, 1, 2)
    blob = blob.transpose(channel_swap)
    return blob


def prep_im_for_blob(im, pixel_means, target_size, max_size):
    """Prepare an image for use as a network input blob. Specially:
      - Subtract per-channel pixel mean
      - Convert to float32
      - Rescale to each of the specified target size (capped at max_size)
    Returns a list of transformed images, one for each target size. Also returns
    the scale factors that were used to compute each returned image.
    """
    im = im.astype(np.float32, copy=False)
    im -= pixel_means
    im_shape = im.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    im_scale = float(target_size) / float(im_size_min)
    # Prevent the biggest axis from being more than max_size
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    im = cv2.resize(
        im,
        None,
        None,
        fx=im_scale,
        fy=im_scale,
        interpolation=cv2.INTER_LINEAR
    )
    return im, im_scale


def zeros(shape, int32=False):
    """Return a blob of all zeros of the given shape with the correct float or
    int data type.
    """
    return np.zeros(shape, dtype=np.int32 if int32 else np.float32)


def ones(shape, int32=False):
    """Return a blob of all ones of the given shape with the correct float or
    int data type.
    """
    return np.ones(shape, dtype=np.int32 if int32 else np.float32)


def py_op_copy_blob(blob_in, blob_out):
    """Copy a numpy ndarray given as blob_in into the Caffe2 CPUTensor blob
    given as blob_out. Supports float32 and int32 blob data types. This function
    is intended for copying numpy data into a Caffe2 blob in PythonOps.
    """
    # Some awkward voodoo required by Caffe2 to support int32 blobs
    needs_int32_init = False
    try:
        _ = blob.data.dtype  # noqa
    except Exception:
        needs_int32_init = blob_in.dtype == np.int32
    if needs_int32_init:
        # init can only take a list (failed on tuple)
        blob_out.init(list(blob_in.shape), caffe2_pb2.TensorProto.INT32)
    else:
        blob_out.reshape(blob_in.shape)
    blob_out.data[...] = blob_in


def get_loss_gradients(model, loss_blobs):
    """Generate a gradient of 1 for each loss specified in 'loss_blobs'"""
    loss_gradients = {}
    for b in loss_blobs:
        loss_grad = model.net.ConstantFill(b, [b + '_grad'], value=1.0)
        loss_gradients[str(b)] = str(loss_grad)
    return loss_gradients


def serialize(obj):
    """Serialize a Python object using pickle and encode it as an array of
    float32 values so that it can be feed into the workspace. See deserialize().
    """
    return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)


def deserialize(arr):
    """Unserialize a Python object from an array of float32 values fetched from
    a workspace. See serialize().
    """
    return pickle.loads(arr.astype(np.uint8).tobytes())


================================================
FILE: detectron/utils/boxes.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Box manipulation functions. The internal Detectron box format is
[x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2)
specify the bottom-right box corner. Boxes from external sources, e.g.,
datasets, may be in other formats (such as [x, y, w, h]) and require conversion.

This module uses a convention that may seem strange at first: the width of a box
is computed as x2 - x1 + 1 (likewise for height). The "+ 1" dates back to old
object detection days when the coordinates were integer pixel indices, rather
than floating point coordinates in a subpixel coordinate frame. A box with x2 =
x1 and y2 = y1 was taken to include a single pixel, having a width of 1, and
hence requiring the "+ 1". Now, most datasets will likely provide boxes with
floating point coordinates and the width should be more reasonably computed as
x2 - x1.

In practice, as long as a model is trained and tested with a consistent
convention either decision seems to be ok (at least in our experience on COCO).
Since we have a long history of training models with the "+ 1" convention, we
are reluctant to change it even if our modern tastes prefer not to use it.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from detectron.core.config import cfg
import detectron.utils.cython_bbox as cython_bbox
import detectron.utils.cython_nms as cython_nms

bbox_overlaps = cython_bbox.bbox_overlaps


def boxes_area(boxes):
    """Compute the area of an array of boxes."""
    w = (boxes[:, 2] - boxes[:, 0] + 1)
    h = (boxes[:, 3] - boxes[:, 1] + 1)
    areas = w * h
    assert np.all(areas >= 0), 'Negative areas founds'
    return areas


def unique_boxes(boxes, scale=1.0):
    """Return indices of unique boxes."""
    v = np.array([1, 1e3, 1e6, 1e9])
    hashes = np.round(boxes * scale).dot(v)
    _, index = np.unique(hashes, return_index=True)
    return np.sort(index)


def xywh_to_xyxy(xywh):
    """Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format."""
    if isinstance(xywh, (list, tuple)):
        # Single box given as a list of coordinates
        assert len(xywh) == 4
        x1, y1 = xywh[0], xywh[1]
        x2 = x1 + np.maximum(0., xywh[2] - 1.)
        y2 = y1 + np.maximum(0., xywh[3] - 1.)
        return (x1, y1, x2, y2)
    elif isinstance(xywh, np.ndarray):
        # Multiple boxes given as a 2D ndarray
        return np.hstack(
            (xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1))
        )
    else:
        raise TypeError('Argument xywh must be a list, tuple, or numpy array.')


def xyxy_to_xywh(xyxy):
    """Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
    if isinstance(xyxy, (list, tuple)):
        # Single box given as a list of coordinates
        assert len(xyxy) == 4
        x1, y1 = xyxy[0], xyxy[1]
        w = xyxy[2] - x1 + 1
        h = xyxy[3] - y1 + 1
        return (x1, y1, w, h)
    elif isinstance(xyxy, np.ndarray):
        # Multiple boxes given as a 2D ndarray
        return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
    else:
        raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')


def filter_small_boxes(boxes, min_size):
    """Keep boxes with width and height both greater than min_size."""
    w = boxes[:, 2] - boxes[:, 0] + 1
    h = boxes[:, 3] - boxes[:, 1] + 1
    keep = np.where((w > min_size) & (h > min_size))[0]
    return keep


def clip_boxes_to_image(boxes, height, width):
    """Clip an array of boxes to an image with the given height and width."""
    boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))
    boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))
    return boxes


def clip_xyxy_to_image(x1, y1, x2, y2, height, width):
    """Clip coordinates to an image with the given height and width."""
    x1 = np.minimum(width - 1., np.maximum(0., x1))
    y1 = np.minimum(height - 1., np.maximum(0., y1))
    x2 = np.minimum(width - 1., np.maximum(0., x2))
    y2 = np.minimum(height - 1., np.maximum(0., y2))
    return x1, y1, x2, y2


def clip_tiled_boxes(boxes, im_shape):
    """Clip boxes to image boundaries. im_shape is [height, width] and boxes
    has shape (N, 4 * num_tiled_boxes)."""
    assert boxes.shape[1] % 4 == 0, \
        'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
        boxes.shape[1]
    )
    # x1 >= 0
    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
    # y1 >= 0
    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
    # x2 < im_shape[1]
    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
    # y2 < im_shape[0]
    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
    return boxes


def bbox_transform(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
    """Forward transform that maps proposal boxes to predicted ground-truth
    boxes using bounding-box regression deltas. See bbox_transform_inv for a
    description of the weights argument.
    """
    if boxes.shape[0] == 0:
        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    boxes = boxes.astype(deltas.dtype, copy=False)

    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    wx, wy, ww, wh = weights
    dx = deltas[:, 0::4] / wx
    dy = deltas[:, 1::4] / wy
    dw = deltas[:, 2::4] / ww
    dh = deltas[:, 3::4] / wh

    # Prevent sending too large values into np.exp()
    dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
    dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)

    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
    pred_w = np.exp(dw) * widths[:, np.newaxis]
    pred_h = np.exp(dh) * heights[:, np.newaxis]

    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
    # x1
    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
    # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1

    return pred_boxes


def bbox_transform_inv(boxes, gt_boxes, weights=(1.0, 1.0, 1.0, 1.0)):
    """Inverse transform that computes target bounding-box regression deltas
    given proposal boxes and ground-truth boxes. The weights argument should be
    a 4-tuple of multiplicative weights that are applied to the regression
    target.

    In older versions of this code (and in py-faster-rcnn), the weights were set
    such that the regression deltas would have unit standard deviation on the
    training dataset. Presently, rather than computing these statistics exactly,
    we use a fixed set of weights (10., 10., 5., 5.) by default. These are
    approximately the weights one would get from COCO using the previous unit
    stdev heuristic.
    """
    ex_widths = boxes[:, 2] - boxes[:, 0] + 1.0
    ex_heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ex_ctr_x = boxes[:, 0] + 0.5 * ex_widths
    ex_ctr_y = boxes[:, 1] + 0.5 * ex_heights

    gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0
    gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0
    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights

    wx, wy, ww, wh = weights
    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = ww * np.log(gt_widths / ex_widths)
    targets_dh = wh * np.log(gt_heights / ex_heights)

    targets = np.vstack((targets_dx, targets_dy, targets_dw,
                         targets_dh)).transpose()
    return targets


def expand_boxes(boxes, scale):
    """Expand an array of boxes by a given scale."""
    w_half = (boxes[:, 2] - boxes[:, 0]) * .5
    h_half = (boxes[:, 3] - boxes[:, 1]) * .5
    x_c = (boxes[:, 2] + boxes[:, 0]) * .5
    y_c = (boxes[:, 3] + boxes[:, 1]) * .5

    w_half *= scale
    h_half *= scale

    boxes_exp = np.zeros(boxes.shape)
    boxes_exp[:, 0] = x_c - w_half
    boxes_exp[:, 2] = x_c + w_half
    boxes_exp[:, 1] = y_c - h_half
    boxes_exp[:, 3] = y_c + h_half

    return boxes_exp


def flip_boxes(boxes, im_width):
    """Flip boxes horizontally."""
    boxes_flipped = boxes.copy()
    boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
    boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
    return boxes_flipped


def aspect_ratio(boxes, aspect_ratio):
    """Perform width-relative aspect ratio transformation."""
    boxes_ar = boxes.copy()
    boxes_ar[:, 0::4] = aspect_ratio * boxes[:, 0::4]
    boxes_ar[:, 2::4] = aspect_ratio * boxes[:, 2::4]
    return boxes_ar


def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0):
    """Apply bounding-box voting to refine `top_dets` by voting with `all_dets`.
    See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the
    referenced  paper) can be applied by setting `scoring_method` appropriately.
    """
    # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
    # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
    top_dets_out = top_dets.copy()
    top_boxes = top_dets[:, :4]
    all_boxes = all_dets[:, :4]
    all_scores = all_dets[:, 4]
    top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
    for k in range(top_dets_out.shape[0]):
        inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0]
        boxes_to_vote = all_boxes[inds_to_vote, :]
        ws = all_scores[inds_to_vote]
        top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws)
        if scoring_method == 'ID':
            # Identity, nothing to do
            pass
        elif scoring_method == 'TEMP_AVG':
            # Average probabilities (considered as P(detected class) vs.
            # P(not the detected class)) after smoothing with a temperature
            # hyperparameter.
            P = np.vstack((ws, 1.0 - ws))
            P_max = np.max(P, axis=0)
            X = np.log(P / P_max)
            X_exp = np.exp(X / beta)
            P_temp = X_exp / np.sum(X_exp, axis=0)
            P_avg = P_temp[0].mean()
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'AVG':
            # Combine new probs from overlapping boxes
            top_dets_out[k, 4] = ws.mean()
        elif scoring_method == 'IOU_AVG':
            P = ws
            ws = top_to_all_overlaps[k, inds_to_vote]
            P_avg = np.average(P, weights=ws)
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'GENERALIZED_AVG':
            P_avg = np.mean(ws**beta)**(1.0 / beta)
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'QUASI_SUM':
            top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta
        else:
            raise NotImplementedError(
                'Unknown scoring method {}'.format(scoring_method)
            )

    return top_dets_out


def nms(dets, thresh):
    """Apply classic DPM-style greedy NMS."""
    if dets.shape[0] == 0:
        return []
    return cython_nms.nms(dets, thresh)


def soft_nms(
    dets, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear'
):
    """Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503."""
    if dets.shape[0] == 0:
        return dets, []

    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
    assert method in methods, 'Unknown soft_nms method: {}'.format(method)

    dets, keep = cython_nms.soft_nms(
        np.ascontiguousarray(dets, dtype=np.float32),
        np.float32(sigma),
        np.float32(overlap_thresh),
        np.float32(score_thresh),
        np.uint8(methods[method])
    )
    return dets, keep


================================================
FILE: detectron/utils/c2.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Helpful utilities for working with Caffe2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from six import string_types
import contextlib
import subprocess

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import dyndep
from caffe2.python import scope
from caffe2.python import workspace

import detectron.utils.env as envu


def import_contrib_ops():
    """Import contrib ops needed by Detectron."""
    envu.import_nccl_ops()


def import_detectron_ops():
    """Import Detectron ops."""
    detectron_ops_lib = envu.get_detectron_ops_lib()
    dyndep.InitOpsLibrary(detectron_ops_lib)


def import_custom_ops():
    """Import custom ops."""
    custom_ops_lib = envu.get_custom_ops_lib()
    dyndep.InitOpsLibrary(custom_ops_lib)


def SuffixNet(name, net, prefix_len, outputs):
    """Returns a new Net from the given Net (`net`) that includes only the ops
    after removing the first `prefix_len` number of ops. The new Net is thus a
    suffix of `net`. Blobs listed in `outputs` are registered as external output
    blobs.
    """
    outputs = BlobReferenceList(outputs)
    for output in outputs:
        assert net.BlobIsDefined(output)
    new_net = net.Clone(name)

    del new_net.Proto().op[:]
    del new_net.Proto().external_input[:]
    del new_net.Proto().external_output[:]

    # Add suffix ops
    new_net.Proto().op.extend(net.Proto().op[prefix_len:])
    # Add external input blobs
    # Treat any undefined blobs as external inputs
    input_names = [
        i for op in new_net.Proto().op for i in op.input
        if not new_net.BlobIsDefined(i)]
    new_net.Proto().external_input.extend(input_names)
    # Add external output blobs
    output_names = [str(o) for o in outputs]
    new_net.Proto().external_output.extend(output_names)
    return new_net, [new_net.GetBlobRef(o) for o in output_names]


def BlobReferenceList(blob_ref_or_list):
    """Ensure that the argument is returned as a list of BlobReferences."""
    if isinstance(blob_ref_or_list, core.BlobReference):
        return [blob_ref_or_list]
    elif type(blob_ref_or_list) in (list, tuple):
        for b in blob_ref_or_list:
            assert isinstance(b, core.BlobReference)
        return blob_ref_or_list
    else:
        raise TypeError(
            'blob_ref_or_list must be a BlobReference or a list/tuple of '
            'BlobReferences'
        )


def UnscopeName(possibly_scoped_name):
    """Remove any name scoping from a (possibly) scoped name. For example,
    convert the name 'gpu_0/foo' to 'foo'."""
    assert isinstance(possibly_scoped_name, string_types)
    return possibly_scoped_name[
        possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]


@contextlib.contextmanager
def NamedCudaScope(gpu_id):
    """Creates a GPU name scope and CUDA device scope. This function is provided
    to reduce `with ...` nesting levels."""
    with GpuNameScope(gpu_id):
        with CudaScope(gpu_id):
            yield


@contextlib.contextmanager
def GpuNameScope(gpu_id):
    """Create a name scope for GPU device `gpu_id`."""
    with core.NameScope('gpu_{:d}'.format(gpu_id)):
        yield


@contextlib.contextmanager
def CudaScope(gpu_id):
    """Create a CUDA device scope for GPU device `gpu_id`."""
    gpu_dev = CudaDevice(gpu_id)
    with core.DeviceScope(gpu_dev):
        yield


@contextlib.contextmanager
def CpuScope():
    """Create a CPU device scope."""
    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
    with core.DeviceScope(cpu_dev):
        yield


def CudaDevice(gpu_id):
    """Create a Cuda device."""
    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)


def gauss_fill(std):
    """Gaussian fill helper to reduce verbosity."""
    return ('GaussianFill', {'std': std})


def const_fill(value):
    """Constant fill helper to reduce verbosity."""
    return ('ConstantFill', {'value': value})


def get_nvidia_info():
    return (
        get_nvidia_smi_output(),
        workspace.GetCUDAVersion(),
        workspace.GetCuDNNVersion(),
    )


def get_nvidia_smi_output():
    try:
        info = subprocess.check_output(["nvidia-smi"], stderr=subprocess.STDOUT)
        info = info.decode("utf8")
    except Exception as e:
        info = "Executing nvidia-smi failed: " + str(e)
    return info.strip()


================================================
FILE: detectron/utils/collections.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""A simple attribute dictionary used for representing configuration options."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals


class AttrDict(dict):

    IMMUTABLE = '__immutable__'

    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__[AttrDict.IMMUTABLE] = False

    def __getattr__(self, name):
        if name in self.__dict__:
            return self.__dict__[name]
        elif name in self:
            return self[name]
        else:
            raise AttributeError(name)

    def __setattr__(self, name, value):
        if not self.__dict__[AttrDict.IMMUTABLE]:
            if name in self.__dict__:
                self.__dict__[name] = value
            else:
                self[name] = value
        else:
            raise AttributeError(
                'Attempted to set "{}" to "{}", but AttrDict is immutable'.
                format(name, value)
            )

    def immutable(self, is_immutable):
        """Set immutability to is_immutable and recursively apply the setting
        to all nested AttrDicts.
        """
        self.__dict__[AttrDict.IMMUTABLE] = is_immutable
        # Recursively set immutable state
        for v in self.__dict__.values():
            if isinstance(v, AttrDict):
                v.immutable(is_immutable)
        for v in self.values():
            if isinstance(v, AttrDict):
                v.immutable(is_immutable)

    def is_immutable(self):
        return self.__dict__[AttrDict.IMMUTABLE]


================================================
FILE: detectron/utils/colormap.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""An awesome colormap for really neat visualizations."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np


def colormap(rgb=False):
    color_list = np.array(
        [
            0.000, 0.447, 0.741,
            0.850, 0.325, 0.098,
            0.929, 0.694, 0.125,
            0.494, 0.184, 0.556,
            0.466, 0.674, 0.188,
            0.301, 0.745, 0.933,
            0.635, 0.078, 0.184,
            0.300, 0.300, 0.300,
            0.600, 0.600, 0.600,
            1.000, 0.000, 0.000,
            1.000, 0.500, 0.000,
            0.749, 0.749, 0.000,
            0.000, 1.000, 0.000,
            0.000, 0.000, 1.000,
            0.667, 0.000, 1.000,
            0.333, 0.333, 0.000,
            0.333, 0.667, 0.000,
            0.333, 1.000, 0.000,
            0.667, 0.333, 0.000,
            0.667, 0.667, 0.000,
            0.667, 1.000, 0.000,
            1.000, 0.333, 0.000,
            1.000, 0.667, 0.000,
            1.000, 1.000, 0.000,
            0.000, 0.333, 0.500,
            0.000, 0.667, 0.500,
            0.000, 1.000, 0.500,
            0.333, 0.000, 0.500,
            0.333, 0.333, 0.500,
            0.333, 0.667, 0.500,
            0.333, 1.000, 0.500,
            0.667, 0.000, 0.500,
            0.667, 0.333, 0.500,
            0.667, 0.667, 0.500,
            0.667, 1.000, 0.500,
            1.000, 0.000, 0.500,
            1.000, 0.333, 0.500,
            1.000, 0.667, 0.500,
            1.000, 1.000, 0.500,
            0.000, 0.333, 1.000,
            0.000, 0.667, 1.000,
            0.000, 1.000, 1.000,
            0.333, 0.000, 1.000,
            0.333, 0.333, 1.000,
            0.333, 0.667, 1.000,
            0.333, 1.000, 1.000,
            0.667, 0.000, 1.000,
            0.667, 0.333, 1.000,
            0.667, 0.667, 1.000,
            0.667, 1.000, 1.000,
            1.000, 0.000, 1.000,
            1.000, 0.333, 1.000,
            1.000, 0.667, 1.000,
            0.167, 0.000, 0.000,
            0.333, 0.000, 0.000,
            0.500, 0.000, 0.000,
            0.667, 0.000, 0.000,
            0.833, 0.000, 0.000,
            1.000, 0.000, 0.000,
            0.000, 0.167, 0.000,
            0.000, 0.333, 0.000,
            0.000, 0.500, 0.000,
            0.000, 0.667, 0.000,
            0.000, 0.833, 0.000,
            0.000, 1.000, 0.000,
            0.000, 0.000, 0.167,
            0.000, 0.000, 0.333,
            0.000, 0.000, 0.500,
            0.000, 0.000, 0.667,
            0.000, 0.000, 0.833,
            0.000, 0.000, 1.000,
            0.000, 0.000, 0.000,
            0.143, 0.143, 0.143,
            0.286, 0.286, 0.286,
            0.429, 0.429, 0.429,
            0.571, 0.571, 0.571,
            0.714, 0.714, 0.714,
            0.857, 0.857, 0.857,
            1.000, 1.000, 1.000
        ]
    ).astype(np.float32)
    color_list = color_list.reshape((-1, 3)) * 255
    if not rgb:
        color_list = color_list[:, ::-1]
    return color_list


================================================
FILE: detectron/utils/coordinator.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Coordinated access to a shared multithreading/processing queue."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import contextlib
import logging
import threading
import traceback
from six.moves import queue as Queue

log = logging.getLogger(__name__)


class Coordinator:

    def __init__(self):
        self._event = threading.Event()

    def request_stop(self):
        log.debug('Coordinator stopping')
        self._event.set()

    def should_stop(self):
        return self._event.is_set()

    def wait_for_stop(self):
        return self._event.wait()

    @contextlib.contextmanager
    def stop_on_exception(self):
        try:
            yield
        except Exception:
            if not self.should_stop():
                traceback.print_exc()
                self.request_stop()


def coordinated_get(coordinator, queue):
    while not coordinator.should_stop():
        try:
            return queue.get(block=True, timeout=1.0)
        except Queue.Empty:
            continue
    raise Exception('Coordinator stopped during get()')


def coordinated_put(coordinator, queue, element):
    while not coordinator.should_stop():
        try:
            queue.put(element, block=True, timeout=1.0)
            return
        except Queue.Full:
            continue
    raise Exception('Coordinator stopped during put()')


================================================
FILE: detectron/utils/cython_bbox.pyx
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------

cimport cython
import numpy as np
cimport numpy as np

DTYPE = np.float32
ctypedef np.float32_t DTYPE_t

@cython.boundscheck(False)
def bbox_overlaps(
        np.ndarray[DTYPE_t, ndim=2] boxes,
        np.ndarray[DTYPE_t, ndim=2] query_boxes):
    """
    Parameters
    ----------
    boxes: (N, 4) ndarray of float
    query_boxes: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    cdef unsigned int N = boxes.shape[0]
    cdef unsigned int K = query_boxes.shape[0]
    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
    cdef DTYPE_t iw, ih, box_area
    cdef DTYPE_t ua
    cdef unsigned int k, n
    with nogil:
        for k in range(K):
            box_area = (
                (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
                (query_boxes[k, 3] - query_boxes[k, 1] + 1)
            )
            for n in range(N):
                iw = (
                    min(boxes[n, 2], query_boxes[k, 2]) -
                    max(boxes[n, 0], query_boxes[k, 0]) + 1
                )
                if iw > 0:
                    ih = (
                        min(boxes[n, 3], query_boxes[k, 3]) -
                        max(boxes[n, 1], query_boxes[k, 1]) + 1
                    )
                    if ih > 0:
                        ua = float(
                            (boxes[n, 2] - boxes[n, 0] + 1) *
                            (boxes[n, 3] - boxes[n, 1] + 1) +
                            box_area - iw * ih
                        )
                        overlaps[n, k] = iw * ih / ua
    return overlaps


================================================
FILE: detectron/utils/cython_nms.pyx
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

cimport cython
import numpy as np
cimport numpy as np

cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil:
    return a if a >= b else b

cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil:
    return a if a <= b else b

@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh):
    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]

    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]

    cdef int ndets = dets.shape[0]
    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
            np.zeros((ndets), dtype=np.int)

    # nominal indices
    cdef int _i, _j
    # sorted indices
    cdef int i, j
    # temp variables for box i's (the box currently under consideration)
    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
    # variables for computing overlap with box j (lower scoring box)
    cdef np.float32_t xx1, yy1, xx2, yy2
    cdef np.float32_t w, h
    cdef np.float32_t inter, ovr

    with nogil:
      for _i in range(ndets):
          i = order[_i]
          if suppressed[i] == 1:
              continue
          ix1 = x1[i]
          iy1 = y1[i]
          ix2 = x2[i]
          iy2 = y2[i]
          iarea = areas[i]
          for _j in range(_i + 1, ndets):
              j = order[_j]
              if suppressed[j] == 1:
                  continue
              xx1 = max(ix1, x1[j])
              yy1 = max(iy1, y1[j])
              xx2 = min(ix2, x2[j])
              yy2 = min(iy2, y2[j])
              w = max(0.0, xx2 - xx1 + 1)
              h = max(0.0, yy2 - yy1 + 1)
              inter = w * h
              ovr = inter / (iarea + areas[j] - inter)
              if ovr >= thresh:
                  suppressed[j] = 1

    return np.where(suppressed == 0)[0]

# ----------------------------------------------------------
# Soft-NMS: Improving Object Detection With One Line of Code
# Copyright (c) University of Maryland, College Park
# Licensed under The MIT License [see LICENSE for details]
# Written by Navaneeth Bodla and Bharat Singh
# ----------------------------------------------------------
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
def soft_nms(
    np.ndarray[float, ndim=2] boxes_in,
    float sigma=0.5,
    float Nt=0.3,
    float threshold=0.001,
    unsigned int method=0
):
    boxes = boxes_in.copy()
    cdef unsigned int N = boxes.shape[0]
    cdef float iw, ih, box_area
    cdef float ua
    cdef int pos = 0
    cdef float maxscore = 0
    cdef int maxpos = 0
    cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
    inds = np.arange(N)

    for i in range(N):
        maxscore = boxes[i, 4]
        maxpos = i

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]
        ti = inds[i]

        pos = i + 1
        # get max box
        while pos < N:
            if maxscore < boxes[pos, 4]:
                maxscore = boxes[pos, 4]
                maxpos = pos
            pos = pos + 1

        # add max box as a detection
        boxes[i,0] = boxes[maxpos,0]
        boxes[i,1] = boxes[maxpos,1]
        boxes[i,2] = boxes[maxpos,2]
        boxes[i,3] = boxes[maxpos,3]
        boxes[i,4] = boxes[maxpos,4]
        inds[i] = inds[maxpos]

        # swap ith box with position of max box
        boxes[maxpos,0] = tx1
        boxes[maxpos,1] = ty1
        boxes[maxpos,2] = tx2
        boxes[maxpos,3] = ty2
        boxes[maxpos,4] = ts
        inds[maxpos] = ti

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]

        pos = i + 1
        # NMS iterations, note that N changes if detection boxes fall below
        # threshold
        while pos < N:
            x1 = boxes[pos, 0]
            y1 = boxes[pos, 1]
            x2 = boxes[pos, 2]
            y2 = boxes[pos, 3]
            s = boxes[pos, 4]

            area = (x2 - x1 + 1) * (y2 - y1 + 1)
            iw = (min(tx2, x2) - max(tx1, x1) + 1)
            if iw > 0:
                ih = (min(ty2, y2) - max(ty1, y1) + 1)
                if ih > 0:
                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
                    ov = iw * ih / ua #iou between max box and detection box

                    if method == 1: # linear
                        if ov > Nt:
                            weight = 1 - ov
                        else:
                            weight = 1
                    elif method == 2: # gaussian
                        weight = np.exp(-(ov * ov)/sigma)
                    else: # original NMS
                        if ov > Nt:
                            weight = 0
                        else:
                            weight = 1

                    boxes[pos, 4] = weight*boxes[pos, 4]

                    # if box score falls below threshold, discard the box by
                    # swapping with last box update N
                    if boxes[pos, 4] < threshold:
                        boxes[pos,0] = boxes[N-1, 0]
                        boxes[pos,1] = boxes[N-1, 1]
                        boxes[pos,2] = boxes[N-1, 2]
                        boxes[pos,3] = boxes[N-1, 3]
                        boxes[pos,4] = boxes[N-1, 4]
                        inds[pos] = inds[N-1]
                        N = N - 1
                        pos = pos - 1

            pos = pos + 1

    return boxes[:N], inds[:N]


================================================
FILE: detectron/utils/env.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Environment helper functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os
import sys
import yaml

# Default value of the CMake install prefix
_CMAKE_INSTALL_PREFIX = '/usr/local'
# Detectron ops lib
_DETECTRON_OPS_LIB = 'libcaffe2_detectron_ops_gpu.so'


def get_runtime_dir():
    """Retrieve the path to the runtime directory."""
    return sys.path[0]


def get_py_bin_ext():
    """Retrieve python binary extension."""
    return '.py'


def set_up_matplotlib():
    """Set matplotlib up."""
    import matplotlib
    # Use a non-interactive backend
    matplotlib.use('Agg')


def exit_on_error():
    """Exit from a detectron tool when there's an error."""
    sys.exit(1)


def import_nccl_ops():
    """Import NCCL ops."""
    # There is no need to load NCCL ops since the
    # NCCL dependency is built into the Caffe2 gpu lib
    pass


def get_detectron_ops_lib():
    """Retrieve Detectron ops library."""
    # Candidate prefixes for detectron ops lib path
    prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path
    # Candidate subdirs for detectron ops lib
    subdirs = ['lib', 'torch/lib']
    # Try to find detectron ops lib
    for prefix in prefixes:
        for subdir in subdirs:
            ops_path = os.path.join(prefix, subdir, _DETECTRON_OPS_LIB)
            if os.path.exists(ops_path):
                print('Found Detectron ops lib: {}'.format(ops_path))
                return ops_path
    raise Exception('Detectron ops lib not found')


def get_custom_ops_lib():
    """Retrieve custom ops library."""
    det_dir, _ = os.path.split(os.path.dirname(__file__))
    root_dir, _ = os.path.split(det_dir)
    custom_ops_lib = os.path.join(
        root_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
    assert os.path.exists(custom_ops_lib), \
        'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
    return custom_ops_lib


# YAML load/dump function aliases
yaml_load = yaml.load
yaml_dump = yaml.dump


================================================
FILE: detectron/utils/image.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Image helper functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np


def aspect_ratio_rel(im, aspect_ratio):
    """Performs width-relative aspect ratio transformation."""
    im_h, im_w = im.shape[:2]
    im_ar_w = int(round(aspect_ratio * im_w))
    im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
    return im_ar


def aspect_ratio_abs(im, aspect_ratio):
    """Performs absolute aspect ratio transformation."""
    im_h, im_w = im.shape[:2]
    im_area = im_h * im_w

    im_ar_w = np.sqrt(im_area * aspect_ratio)
    im_ar_h = np.sqrt(im_area / aspect_ratio)
    assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)

    im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
    return im_ar


================================================
FILE: detectron/utils/io.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""IO utilities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import errno
import hashlib
import logging
import os
import re
import six
import sys
from six.moves import cPickle as pickle
from six.moves import urllib
from uuid import uuid4

logger = logging.getLogger(__name__)

_DETECTRON_S3_BASE_URL = 'https://dl.fbaipublicfiles.com/detectron'


def save_object(obj, file_name, pickle_format=2):
    """Save a Python object by pickling it.

Unless specifically overridden, we want to save it in Pickle format=2 since this
will allow other Python2 executables to load the resulting Pickle. When we want
to completely remove Python2 backward-compatibility, we can bump it up to 3. We
should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting
file is manifested or used, external to the system.
    """
    file_name = os.path.abspath(file_name)
    # Avoid filesystem race conditions (particularly on network filesystems)
    # by saving to a random tmp file on the same filesystem, and then
    # atomically rename to the target filename.
    tmp_file_name = file_name + ".tmp." + uuid4().hex
    try:
        with open(tmp_file_name, 'wb') as f:
            pickle.dump(obj, f, pickle_format)
            f.flush()  # make sure it's written to disk
            os.fsync(f.fileno())
        os.rename(tmp_file_name, file_name)
    finally:
        # Clean up the temp file on failure. Rather than using os.path.exists(),
        # which can be unreliable on network filesystems, attempt to delete and
        # ignore os errors.
        try:
            os.remove(tmp_file_name)
        except EnvironmentError as e:  # parent class of IOError, OSError
            if getattr(e, 'errno', None) != errno.ENOENT:  # We expect ENOENT
                logger.info("Could not delete temp file %r",
                    tmp_file_name, exc_info=True)
                # pass through since we don't want the job to crash


def load_object(file_name):
    with open(file_name, 'rb') as f:
        # The default encoding used while unpickling is 7-bit (ASCII.) However,
        # the blobs are arbitrary 8-bit bytes which don't agree. The absolute
        # correct way to do this is to use `encoding="bytes"` and then interpret
        # the blob names either as ASCII, or better, as unicode utf-8. A
        # reasonable fix, however, is to treat it the encoding as 8-bit latin1
        # (which agrees with the first 256 characters of Unicode anyway.)
        if six.PY2:
            return pickle.load(f)
        else:
            return pickle.load(f, encoding='latin1')


def cache_url(url_or_file, cache_dir):
    """Download the file specified by the URL to the cache_dir and return the
    path to the cached file. If the argument is not a URL, simply return it as
    is.
    """
    is_url = re.match(
        r'^(?:http)s?://', url_or_file, re.IGNORECASE
    ) is not None

    if not is_url:
        return url_or_file

    url = url_or_file
    assert url.startswith(_DETECTRON_S3_BASE_URL), \
        ('Detectron only automatically caches URLs in the Detectron S3 '
         'bucket: {}').format(_DETECTRON_S3_BASE_URL)

    cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
    if os.path.exists(cache_file_path):
        assert_cache_file_is_ok(url, cache_file_path)
        return cache_file_path

    cache_file_dir = os.path.dirname(cache_file_path)
    if not os.path.exists(cache_file_dir):
        os.makedirs(cache_file_dir)

    logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
    download_url(url, cache_file_path)
    assert_cache_file_is_ok(url, cache_file_path)
    return cache_file_path


def assert_cache_file_is_ok(url, file_path):
    """Check that cache file has the correct hash."""
    # File is already in the cache, verify that the md5sum matches and
    # return local path
    cache_file_md5sum = _get_file_md5sum(file_path)
    ref_md5sum = _get_reference_md5sum(url)
    assert cache_file_md5sum == ref_md5sum, \
        ('Target URL {} appears to be downloaded to the local cache file '
         '{}, but the md5 hash of the local file does not match the '
         'reference (actual: {} vs. expected: {}). You may wish to delete '
         'the cached file and try again to trigger automatic '
         'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)


def _progress_bar(count, total):
    """Report download progress.
    Credit:
    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
    """
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write(
        '  [{}] {}% of {:.1f}MB file  \r'.
        format(bar, percents, total / 1024 / 1024)
    )
    sys.stdout.flush()
    if count >= total:
        sys.stdout.write('\n')


def download_url(
    url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar
):
    """Download url and write it to dst_file_path.
    Credit:
    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    """
    response = urllib.request.urlopen(url)
    if six.PY2:
        total_size = response.info().getheader('Content-Length').strip()
    else:
        total_size = response.info().get('Content-Length').strip()
    total_size = int(total_size)
    bytes_so_far = 0

    with open(dst_file_path, 'wb') as f:
        while 1:
            chunk = response.read(chunk_size)
            bytes_so_far += len(chunk)
            if not chunk:
                break
            if progress_hook:
                progress_hook(bytes_so_far, total_size)
            f.write(chunk)

    return bytes_so_far


def _get_file_md5sum(file_name):
    """Compute the md5 hash of a file."""
    hash_obj = hashlib.md5()
    with open(file_name, 'rb') as f:
        hash_obj.update(f.read())
    return hash_obj.hexdigest().encode('utf-8')


def _get_reference_md5sum(url):
    """By convention the md5 hash for url is stored in url + '.md5sum'."""
    url_md5sum = url + '.md5sum'
    md5sum = urllib.request.urlopen(url_md5sum).read().strip()
    return md5sum


================================================
FILE: detectron/utils/keypoints.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Keypoint utilities (somewhat specific to COCO keypoints)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np

from detectron.core.config import cfg
import detectron.utils.blob as blob_utils


def get_keypoints():
    """Get the COCO keypoints and their left/right flip coorespondence map."""
    # Keypoints are not available in the COCO json for the test split, so we
    # provide them here.
    keypoints = [
        'nose',
        'left_eye',
        'right_eye',
        'left_ear',
        'right_ear',
        'left_shoulder',
        'right_shoulder',
        'left_elbow',
        'right_elbow',
        'left_wrist',
        'right_wrist',
        'left_hip',
        'right_hip',
        'left_knee',
        'right_knee',
        'left_ankle',
        'right_ankle'
    ]
    keypoint_flip_map = {
        'left_eye': 'right_eye',
        'left_ear': 'right_ear',
        'left_shoulder': 'right_shoulder',
        'left_elbow': 'right_elbow',
        'left_wrist': 'right_wrist',
        'left_hip': 'right_hip',
        'left_knee': 'right_knee',
        'left_ankle': 'right_ankle'
    }
    return keypoints, keypoint_flip_map


def get_person_class_index():
    """Index of the person class in COCO."""
    return 1


def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):
    """Left/right flip keypoint_coords. keypoints and keypoint_flip_map are
    accessible from get_keypoints().
    """
    flipped_kps = keypoint_coords.copy()
    for lkp, rkp in keypoint_flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]
        flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]

    # Flip x coordinates
    flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1
    # Maintain COCO convention that if visibility == 0, then x, y = 0
    inds = np.where(flipped_kps[:, 2, :] == 0)
    flipped_kps[inds[0], 0, inds[1]] = 0
    return flipped_kps


def flip_heatmaps(heatmaps):
    """Flip heatmaps horizontally."""
    keypoints, flip_map = get_keypoints()
    heatmaps_flipped = heatmaps.copy()
    for lkp, rkp in flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]
        heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]
    heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]
    return heatmaps_flipped


def heatmaps_to_keypoints(maps, rois):
    """Extract predicted keypoint locations from heatmaps. Output has shape
    (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
    for each keypoint.
    """
    # This function converts a discrete image coordinate in a HEATMAP_SIZE x
    # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
    # consistency with keypoints_to_heatmap_labels by using the conversion from
    # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
    # continuous coordinate.
    offset_x = rois[:, 0]
    offset_y = rois[:, 1]

    widths = rois[:, 2] - rois[:, 0]
    heights = rois[:, 3] - rois[:, 1]
    widths = np.maximum(widths, 1)
    heights = np.maximum(heights, 1)
    widths_ceil = np.ceil(widths)
    heights_ceil = np.ceil(heights)

    # NCHW to NHWC for use with OpenCV
    maps = np.transpose(maps, [0, 2, 3, 1])
    min_size = cfg.KRCNN.INFERENCE_MIN_SIZE
    xy_preds = np.zeros(
        (len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)
    for i in range(len(rois)):
        if min_size > 0:
            roi_map_width = int(np.maximum(widths_ceil[i], min_size))
            roi_map_height = int(np.maximum(heights_ceil[i], min_size))
        else:
            roi_map_width = widths_ceil[i]
            roi_map_height = heights_ceil[i]
        width_correction = widths[i] / roi_map_width
        height_correction = heights[i] / roi_map_height
        roi_map = cv2.resize(
            maps[i], (roi_map_width, roi_map_height),
            interpolation=cv2.INTER_CUBIC)
        # Bring back to CHW
        roi_map = np.transpose(roi_map, [2, 0, 1])
        roi_map_probs = scores_to_probs(roi_map.copy())
        w = roi_map.shape[2]
        for k in range(cfg.KRCNN.NUM_KEYPOINTS):
            pos = roi_map[k, :, :].argmax()
            x_int = pos % w
            y_int = (pos - x_int) // w
            assert (roi_map_probs[k, y_int, x_int] ==
                    roi_map_probs[k, :, :].max())
            x = (x_int + 0.5) * width_correction
            y = (y_int + 0.5) * height_correction
            xy_preds[i, 0, k] = x + offset_x[i]
            xy_preds[i, 1, k] = y + offset_y[i]
            xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
            xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]

    return xy_preds


def keypoints_to_heatmap_labels(keypoints, rois):
    """Encode keypoint location in the target heatmap for use in
    SoftmaxWithLoss.
    """
    # Maps keypoints from the half-open interval [x1, x2) on continuous image
    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image
    # coordinates. We use the continuous <-> discrete conversion from Heckbert
    # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
    # where d is a discrete coordinate and c is a continuous coordinate.
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros(shape)

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # Since we use floor below, if a keypoint is exactly on the roi's right
        # or bottom boundary, we shift it in by eps (conceptually) to keep it in
        # the ground truth heatmap.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        valid_loc = np.logical_and(
            np.logical_and(x >= 0, y >= 0),
            np.logical_and(
                x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)

        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
        heatmaps[:, kp] = lin_ind * valid
        weights[:, kp] = valid

    return heatmaps, weights


def scores_to_probs(scores):
    """Transforms CxHxW of scores to probabilities spatially."""
    channels = scores.shape[0]
    for c in range(channels):
        temp = scores[c, :, :]
        max_score = temp.max()
        temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
        scores[c, :, :] = temp
    return scores


def nms_oks(kp_predictions, rois, thresh):
    """Nms based on kp predictions."""
    scores = np.mean(kp_predictions[:, 2, :], axis=1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = compute_oks(
            kp_predictions[i], rois[i], kp_predictions[order[1:]],
            rois[order[1:]])
        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):
    """Compute OKS for predicted keypoints wrt gt_keypoints.
    src_keypoints: 4xK
    src_roi: 4x1
    dst_keypoints: Nx4xK
    dst_roi: Nx4
    """

    sigmas = np.array([
        .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87,
        .87, .89, .89]) / 10.0
    vars = (sigmas * 2)**2

    # area
    src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)

    # measure the per-keypoint distance if keypoints visible
    dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]
    dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]

    e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2
    e = np.sum(np.exp(-e), axis=1) / e.shape[1]

    return e


================================================
FILE: detectron/utils/logging.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Utilities for logging."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import deque
from email.mime.text import MIMEText
import json
import logging
import numpy as np
import smtplib
import sys


def log_json_stats(stats, sort_keys=True):
    # hack to control precision of top-level floats
    stats = {
        k: '{:.6f}'.format(v) if isinstance(v, float) else v
        for k, v in stats.items()
    }
    print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))


class SmoothedValue:
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size):
        self.deque = deque(maxlen=window_size)
        self.series = []
        self.total = 0.0
        self.count = 0

    def AddValue(self, value):
        self.deque.append(value)
        self.series.append(value)
        self.count += 1
        self.total += value

    def GetMedianValue(self):
        return np.median(self.deque)

    def GetAverageValue(self):
        return np.mean(self.deque)

    def GetGlobalAverageValue(self):
        return self.total / self.count


def send_email(subject, body, to):
    s = smtplib.SMTP('localhost')
    mime = MIMEText(body)
    mime['Subject'] = subject
    mime['To'] = to
    s.sendmail('detectron', to, mime.as_string())


def setup_logging(name):
    FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
    # Manually clear root loggers to prevent any module that may have called
    # logging.basicConfig() from blocking our logging setup
    logging.root.handlers = []
    logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
    logger = logging.getLogger(name)
    return logger


================================================
FILE: detectron/utils/lr_policy.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Learning rate policies."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from detectron.core.config import cfg


def get_lr_at_iter(it):
    """Get the learning rate at iteration it according to the cfg.SOLVER
    settings.
    """
    lr = get_lr_func()(it)
    if it < cfg.SOLVER.WARM_UP_ITERS:
        method = cfg.SOLVER.WARM_UP_METHOD
        if method == 'constant':
            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR
        elif method == 'linear':
            alpha = it / cfg.SOLVER.WARM_UP_ITERS
            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha
        else:
            raise KeyError('Unknown SOLVER.WARM_UP_METHOD: {}'.format(method))
        lr *= warmup_factor
    return np.float32(lr)


# ---------------------------------------------------------------------------- #
# Learning rate policy functions
# ---------------------------------------------------------------------------- #

def lr_func_steps_with_lrs(cur_iter):
    """For cfg.SOLVER.LR_POLICY = 'steps_with_lrs'

    Change the learning rate to specified values at specified iterations.

    Example:
    cfg.SOLVER.MAX_ITER: 90
    cfg.SOLVER.STEPS:    [0,    60,    80]
    cfg.SOLVER.LRS:      [0.02, 0.002, 0.0002]
    for cur_iter in [0, 59]   use 0.02
                 in [60, 79]  use 0.002
                 in [80, inf] use 0.0002
    """
    ind = get_step_index(cur_iter)
    return cfg.SOLVER.LRS[ind]


def lr_func_steps_with_decay(cur_iter):
    """For cfg.SOLVER.LR_POLICY = 'steps_with_decay'

    Change the learning rate specified iterations based on the formula
    lr = base_lr * gamma ** lr_step_count.

    Example:
    cfg.SOLVER.MAX_ITER: 90
    cfg.SOLVER.STEPS:    [0,    60,    80]
    cfg.SOLVER.BASE_LR:  0.02
    cfg.SOLVER.GAMMA:    0.1
    for cur_iter in [0, 59]   use 0.02 = 0.02 * 0.1 ** 0
                 in [60, 79]  use 0.002 = 0.02 * 0.1 ** 1
                 in [80, inf] use 0.0002 = 0.02 * 0.1 ** 2
    """
    ind = get_step_index(cur_iter)
    return cfg.SOLVER.BASE_LR * cfg.SOLVER.GAMMA ** ind


def lr_func_step(cur_iter):
    """For cfg.SOLVER.LR_POLICY = 'step'
    """
    return (
        cfg.SOLVER.BASE_LR *
        cfg.SOLVER.GAMMA ** (cur_iter // cfg.SOLVER.STEP_SIZE))


def lr_func_cosine_decay(cur_iter):
    """For cfg.SOLVER.LR_POLICY = 'cosine_decay'
    """
    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER
    cos_frac = 0.5 * (np.cos(np.pi * iter_frac) + 1)
    return cfg.SOLVER.BASE_LR * cos_frac


def lr_func_exp_decay(cur_iter):
    """For cfg.SOLVER.LR_POLICY = 'exp_decay'
    """
    # GAMMA is final/initial learning rate ratio
    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER
    exp_frac = np.exp(iter_frac * np.log(cfg.SOLVER.GAMMA))
    return cfg.SOLVER.BASE_LR * exp_frac


# ---------------------------------------------------------------------------- #
# Helpers
# ---------------------------------------------------------------------------- #

def get_step_index(cur_iter):
    """Given an iteration, find which learning rate step we're at."""
    assert cfg.SOLVER.STEPS[0] == 0, 'The first step should always start at 0.'
    steps = cfg.SOLVER.STEPS + [cfg.SOLVER.MAX_ITER]
    for ind, step in enumerate(steps):  # NoQA
        if cur_iter < step:
            break
    return ind - 1


def get_lr_func():
    policy = 'lr_func_' + cfg.SOLVER.LR_POLICY
    if policy not in globals():
        raise NotImplementedError(
            'Unknown LR policy: {}'.format(cfg.SOLVER.LR_POLICY))
    else:
        return globals()[policy]


================================================
FILE: detectron/utils/model_convert_utils.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

'''Helper functions for model conversion to pb'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from functools import wraps
import copy
import numpy as np

from caffe2.python import core, workspace
from caffe2.proto import caffe2_pb2


class OpFilter:
    def __init__(self, **kwargs):
        self.type = None
        self.type_in = None
        self.inputs = None
        self.outputs = None
        self.input_has = None
        self.output_has = None
        self.cond = None
        self.reverse = False

        assert all([x in self.__dict__ for x in kwargs])
        self.__dict__.update(kwargs)

    def check(self, op):
        ret = self.reverse
        if self.type and op.type != self.type:
            return ret
        if self.type_in and op.type not in self.type_in:
            return ret
        if self.inputs and set(op.input) != set(self.inputs):
            return ret
        if self.outputs and set(op.output) != set(self.outputs):
            return ret
        if self.input_has and self.input_has not in op.input:
            return ret
        if self.output_has and self.output_has not in op.output:
            return ret
        if self.cond is not None and not self.cond:
            return ret
        return not ret


def filter_op(op, **kwargs):
    ''' Returns true if passed all checks '''
    return OpFilter(**kwargs).check(op)


def op_filter(**filter_args):
    ''' Returns None if no condition is satisfied '''
    def actual_decorator(f):
        @wraps(f)
        def wrapper(op, **params):
            if not filter_op(op, **filter_args):
                return None
            return f(op, **params)
        return wrapper
    return actual_decorator


def op_func_chain(convert_func_list):
    ''' Run funcs one by one until func return is not None '''
    assert isinstance(convert_func_list, list)

    def _chain(op):
        for x in convert_func_list:
            ret = x(op)
            if ret is not None:
                return ret
        return None

    return _chain


def convert_op_in_ops(ops_ref, func_or_list):
    func = func_or_list
    if isinstance(func_or_list, list):
        func = op_func_chain(func_or_list)
    ops = [op for op in ops_ref]
    converted_ops = []
    for op in ops:
        new_ops = func(op)
        if new_ops is not None and not isinstance(new_ops, list):
            new_ops = [new_ops]
        converted_ops.extend(new_ops if new_ops is not None else [op])
    del ops_ref[:]
    # ops_ref maybe of type RepeatedCompositeFieldContainer
    # which does not have append()
    ops_ref.extend(converted_ops)


def convert_op_in_proto(proto, func_or_list):
    convert_op_in_ops(proto.op, func_or_list)


def get_op_arg(op, arg_name):
    for x in op.arg:
        if x.name == arg_name:
            return x
    return None


def get_op_arg_valf(op, arg_name, default_val):
    arg = get_op_arg(op, arg_name)
    return arg.f if arg is not None else default_val


def update_mobile_engines(net):
    for op in net.op:
        if op.type == "Conv":
            op.engine = "NNPACK"
        if op.type == "ConvTranspose":
            op.engine = "BLOCK"


def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    from itertools import tee
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)


def blob_uses(net, blob):
    u = []
    for i, op in enumerate(net.op):
        if blob in op.input or blob in op.control_input:
            u.append(i)
    return u


def fuse_first_affine(net, params, removed_tensors):
    net = copy.deepcopy(net)
    params = copy.deepcopy(params)

    for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
        if next_.input[0] != current.output[0]:
            continue

        if current.type not in ("Conv", "ConvTranspose") \
           or next_.type != "AffineChannel":
            continue
        if current.output[0] != next_.output[0] and \
                len(blob_uses(net, current.output[0])) != 1:
            # Can't fuse if more than one user unless AffineChannel is inplace
            continue

        # else, can fuse
        conv = current
        affine = next_
        fused_conv = copy.deepcopy(conv)
        fused_conv.output[0] = affine.output[0]
        conv_weight = params[conv.input[1]]
        conv_has_bias = len(conv.input) > 2
        conv_bias = params[conv.input[2]] if conv_has_bias else 0

        A = params[affine.input[1]]
        B = params[affine.input[2]]

        # Thus, can just have the affine transform
        # X * A + B
        # where
        # A = bn_scale * 1.0 / (sqrt(running_var + eps))
        # B =  (bias - running_mean * (1.0 / sqrt(running_var + eps))
        # * bn_scale)

        # This identify should hold if we have correctly fused
        # np.testing.assert_array_equal(
        #     params[conv.output[0]] * A + B,
        #     params[bn.output[0]])

        # Now, we have that the computation made is the following:
        # ((X `conv` W) + b) * A + B
        # Then, we can simply fuse this as follows:
        # (X `conv` (W * A)) + b * A + B
        # which is simply
        # (X `conv` Q) + C
        # where

        # Q = W * A
        # C = b * A + B

        # For ConvTranspose, from the view of convolutions as a
        # Toepeliz multiplication, we have W_ = W^T, so the weights
        # are laid out as (R, S, K, K) (vs (S, R, K, K) for a Conv),
        # so the weights broadcast slightly differently. Remember, our
        # BN scale 'B' is of size (S,)

        A_ = A.reshape(-1, 1, 1, 1) if conv.type == "Conv" else \
            A.reshape(1, -1, 1, 1)

        C = conv_bias * A + B
        Q = conv_weight * A_

        assert params[conv.input[1]].shape == Q.shape

        params[conv.input[1]] = Q
        if conv_has_bias:
            assert params[conv.input[2]].shape == C.shape
            params[conv.input[2]] = C
        else:
            # make af_bias to be bias of the conv layer
            fused_conv.input.append(affine.input[2])
            params[affine.input[2]] = B

        new_ops = net.op[:i] + [fused_conv] + net.op[j + 1:]
        del net.op[:]
        if conv_has_bias:
            del params[affine.input[2]]
            removed_tensors.append(affine.input[2])
        removed_tensors.append(affine.input[1])
        del params[affine.input[1]]
        net.op.extend(new_ops)
        break
    return net, params, removed_tensors


def fuse_affine(net, params, ignore_failure):
    # Run until we hit a fixed point
    removed_tensors = []
    while True:
        (next_net, next_params, removed_tensors) = \
            fuse_first_affine(net, params, removed_tensors)
        if len(next_net.op) == len(net.op):
            if (
                any(op.type == "AffineChannel" for op in next_net.op) and
                not ignore_failure
            ):
                raise Exception(
                    "Model contains AffineChannel op after fusion: %s", next_net)
            return (next_net, next_params, removed_tensors)
        net, params, removed_tensors = (next_net, next_params, removed_tensors)


def fuse_net(fuse_func, net, blobs, ignore_failure=False):
    is_core_net = isinstance(net, core.Net)
    if is_core_net:
        net = net.Proto()

    net, params, removed_tensors = fuse_func(net, blobs, ignore_failure)
    for rt in removed_tensors:
        net.external_input.remove(rt)

    if is_core_net:
        net = core.Net(net)

    return net, params


def fuse_net_affine(net, blobs):
    return fuse_net(fuse_affine, net, blobs)


def add_tensor(net, name, blob):
    ''' Create an operator to store the tensor 'blob',
        run the operator to put the blob to workspace.
        uint8 is stored as an array of string with one element.
    '''
    kTypeNameMapper = {
        np.dtype('float32'): "GivenTensorFill",
        np.dtype('int32'): "GivenTensorIntFill",
        np.dtype('int64'): "GivenTensorInt64Fill",
        np.dtype('uint8'): "GivenTensorStringFill",
    }

    shape = blob.shape
    values = blob
    # pass array of uint8 as a string to save storage
    # storing uint8_t has a large overhead for now
    if blob.dtype == np.dtype('uint8'):
        shape = [1]
        values = [str(blob.data)]

    op = core.CreateOperator(
        kTypeNameMapper[blob.dtype],
        [], [name],
        shape=shape,
        values=values,
        # arg=[
        #     putils.MakeArgument("shape", shape),
        #     putils.MakeArgument("values", values),
        # ]
    )
    net.op.extend([op])


def gen_init_net_from_blobs(blobs, blobs_to_use=None, excluded_blobs=None):
    ''' Generate an initialization net based on a blob dict '''
    ret = caffe2_pb2.NetDef()
    if blobs_to_use is None:
        blobs_to_use = {x for x in blobs}
    else:
        blobs_to_use = copy.deepcopy(blobs_to_use)
    if excluded_blobs is not None:
        blobs_to_use = [x for x in blobs_to_use if x not in excluded_blobs]
    for name in blobs_to_use:
        blob = blobs[name]
        if isinstance(blob, str):
            print('Blob {} with type {} is not supported in generating init net,'
                  ' skipped.'.format(name, type(blob)))
            continue
        add_tensor(ret, name, blob)

    return ret


def get_ws_blobs(blob_names=None):
    ''' Get blobs in 'blob_names' in the default workspace,
        get all blobs if blob_names is None '''
    blobs = {}
    if blob_names is None:
        blob_names = workspace.Blobs()
    blobs = {x: workspace.FetchBlob(x) for x in blob_names}

    return blobs


def get_device_option_cpu():
    device_option = core.DeviceOption(caffe2_pb2.CPU)
    return device_option


def get_device_option_cuda(gpu_id=0):
    device_option = caffe2_pb2.DeviceOption()
    device_option.device_type = caffe2_pb2.CUDA
    device_option.device_id = gpu_id
    return device_option


def create_input_blobs_for_net(net_def):
    for op in net_def.op:
        for blob_in in op.input:
            if not workspace.HasBlob(blob_in):
                workspace.CreateBlob(blob_in)


def compare_model(model1_func, model2_func, test_image, check_blobs):
    ''' model_func(test_image, check_blobs)
    '''
    cb1, cb2 = check_blobs, check_blobs
    if isinstance(check_blobs, dict):
        cb1 = check_blobs.keys()
        cb2 = check_blobs.values()
    print('Running the first model...')
    res1 = model1_func(test_image, check_blobs)
    print('Running the second model...')
    res2 = model2_func(test_image, check_blobs)
    for idx in range(len(cb1)):
        print('Checking {} -> {}...'.format(cb1[idx], cb2[idx]))
        n1, n2 = cb1[idx], cb2[idx]
        r1 = res1[n1] if n1 in res1 else None
        r2 = res2[n2] if n2 in res2 else None
        assert r1 is not None or r2 is None, \
            "Blob {} in model1 is None".format(n1)
        assert r2 is not None or r1 is None, \
            "Blob {} in model2 is None".format(n2)
        assert r1.shape == r2.shape, \
            "Blob {} and {} shape mismatched: {} vs {}".format(
                n1, n2, r1.shape, r2.shape)

        np.testing.assert_array_almost_equal(
            r1, r2, decimal=3,
            err_msg='{} and {} not matched. Max diff: {}'.format(
                n1, n2, np.amax(np.absolute(r1 - r2))))

    return True


# graph_name could not contain word 'graph'
def save_graph(net, file_name, graph_name="net", op_only=True):
    from caffe2.python import net_drawer
    graph = None
    ops = net.op
    if not op_only:
        graph = net_drawer.GetPydotGraph(
            ops, graph_name,
            rankdir="TB")
    else:
        graph = net_drawer.GetPydotGraphMinimal(
            ops, graph_name,
            rankdir="TB", minimal_dependency=True)

    try:
        graph.write_png(file_name)
    except Exception as e:
        print('Error when writing graph to image {}'.format(e))


================================================
FILE: detectron/utils/net.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Helper functions for working with Caffe2 networks (i.e., operator graphs)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import OrderedDict
import logging
import numpy as np
import os
import pprint

from caffe2.python import core
from caffe2.python import workspace

from detectron.core.config import cfg
from detectron.core.config import load_cfg
from detectron.utils.io import load_object
from detectron.utils.io import save_object
import detectron.utils.c2 as c2_utils
import detectron.utils.env as envu

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def initialize_from_weights_file(model, weights_file, broadcast=True):
    """Initialize a model from weights stored in a pickled dictionary. If
    multiple GPUs are used, the loaded weights are synchronized on all GPUs,
    unless 'broadcast' is False.
    """
    initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    if broadcast:
        broadcast_parameters(model)


def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    src_blobs = load_object(weights_file)

    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in unscoped_param_names.keys():
            if (unscoped_param_name.find(']_') >= 0 and
                    unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[
                    unscoped_param_name.find(']_') + 2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                logger.info('{:s} not found'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            logger.info(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name, src_blobs[src_name]
                    .shape
                )
            )
            if dst_name in ws_blobs:
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                assert ws_blob.shape == src_blobs[src_name].shape, \
                    ('Workspace blob {} with shape {} does not match '
                     'weights file shape {}').format(
                        src_name,
                        ws_blob.shape,
                        src_blobs[src_name].shape)
            workspace.FeedBlob(
                dst_name,
                src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(
                        np.float32, copy=False))

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in src_blobs.keys():
        if (src_name not in unscoped_param_names and
                not src_name.endswith('_momentum') and
                src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob(
                    '__preserve__/{:s}'.format(src_name), src_blobs[src_name])
                logger.info(
                    '{:s} preserved in workspace (unused)'.format(src_name))


def save_model_to_weights_file(weights_file, model):
    """Stash model weights in a dictionary and pickle them to a file. We map
    GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' ->
    'conv1_w').
    """
    logger.info(
        'Saving parameters and momentum to {}'.format(
            os.path.abspath(weights_file)))
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save momentum
    for param in model.TrainableParams():
        scoped_name = str(param) + '_momentum'
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save preserved blobs
    for scoped_name in workspace.Blobs():
        if scoped_name.startswith('__preserve__/'):
            unscoped_name = c2_utils.UnscopeName(scoped_name)
            if unscoped_name not in blobs:
                logger.debug(
                    ' {:s} -> {:s} (preserved)'.format(
                        scoped_name, unscoped_name))
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    cfg_yaml = envu.yaml_dump(cfg)
    save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)


def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])


def sum_multi_gpu_blob(blob_name):
    """Return the sum of a scalar blob held on multiple GPUs."""
    val = 0
    for i in range(cfg.NUM_GPUS):
        val += float(workspace.FetchBlob('gpu_{}/{}'.format(i, blob_name)))
    return val


def average_multi_gpu_blob(blob_name):
    """Return the average of a scalar blob held on multiple GPUs."""
    return sum_multi_gpu_blob(blob_name) / cfg.NUM_GPUS


def print_net(model, namescope='gpu_0'):
    """Print the model network."""
    logger.info('Printing model: {}'.format(model.net.Name()))
    op_list = model.net.Proto().op
    for op in op_list:
        input_name = op.input
        # For simplicity: only print the first output
        # Not recommended if there are split layers
        output_name = str(op.output[0])
        op_type = op.type
        op_name = op.name

        if namescope is None or output_name.startswith(namescope):
            # Only print the forward pass network
            if output_name.find('grad') >= 0 or output_name.find('__m') >= 0:
                continue

            try:
                # Under some conditions (e.g., dynamic memory optimization)
                # it is possible that the network frees some blobs when they are
                # no longer needed. Handle this case...
                output_shape = workspace.FetchBlob(output_name).shape
            except BaseException:
                output_shape = '<unknown>'

            first_blob = True
            op_label = op_type + (op_name if op_name == '' else ':' + op_name)
            suffix = ' ------- (op: {})'.format(op_label)
            for j in range(len(input_name)):
                if input_name[j] in model.params:
                    continue
                input_blob = workspace.FetchBlob(input_name[j])
                if isinstance(input_blob, np.ndarray):
                    input_shape = input_blob.shape
                    logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format(
                        c2_utils.UnscopeName(str(input_name[j])),
                        '{}'.format(input_shape),
                        c2_utils.UnscopeName(str(output_name)),
                        '{}'.format(output_shape),
                        suffix))
                    if first_blob:
                        first_blob = False
                        suffix = ' ------|'
    logger.info('End of model: {}'.format(model.net.Name()))


def configure_bbox_reg_weights(model, saved_cfg):
    """Compatibility for old models trained with bounding box regression
    mean/std normalization (instead of fixed weights).
    """
    if 'MODEL' not in saved_cfg or 'BBOX_REG_WEIGHTS' not in saved_cfg.MODEL:
        logger.warning('Model from weights file was trained before config key '
                       'MODEL.BBOX_REG_WEIGHTS was added. Forcing '
                       'MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) to ensure '
                       'correct **inference** behavior.')
        # Generally we don't allow modifying the config, but this is a one-off
        # hack to support some very old models
        is_immutable = cfg.is_immutable()
        cfg.immutable(False)
        cfg.MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.)
        cfg.immutable(is_immutable)
        logger.info('New config:')
        logger.info(pprint.pformat(cfg))
        assert not model.train, (
            'This model was trained with an older version of the code that '
            'used bounding box regression mean/std normalization. It can no '
            'longer be used for training. To upgrade it to a trainable model '
            'please use fb/compat/convert_bbox_reg_normalized_model.py.'
        )


def get_group_gn(dim):
    """
    get number of groups used by GroupNorm, based on number of channels
    """
    dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP
    num_groups = cfg.GROUP_NORM.NUM_GROUPS

    assert dim_per_gp == -1 or num_groups == -1, \
        "GroupNorm: can only specify G or C/G."

    if dim_per_gp > 0:
        assert dim % dim_per_gp == 0
        group_gn = dim // dim_per_gp
    else:
        assert dim % num_groups == 0
        group_gn = num_groups
    return group_gn


================================================
FILE: detectron/utils/segms.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Functions for interacting with segmentation masks in the COCO format.

The following terms are used in this module
    mask: a binary mask encoded as a 2D numpy array
    segm: a segmentation mask in one of the two COCO formats (polygon or RLE)
    polygon: COCO's polygon format
    RLE: COCO's run length encoding format
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

import pycocotools.mask as mask_util

# Type used for storing masks in polygon format
_POLY_TYPE = list
# Type used for storing masks in RLE format
_RLE_TYPE = dict


def is_poly(segm):
    """Determine if segm is a polygon. Valid segm expected (polygon or RLE)."""
    assert isinstance(segm, (_POLY_TYPE, _RLE_TYPE)), \
        'Invalid segm type: {}'.format(type(segm))
    return isinstance(segm, _POLY_TYPE)


def flip_segms(segms, height, width):
    """Left/right flip each mask in a list of masks."""
    def _flip_poly(poly, width):
        flipped_poly = np.array(poly)
        flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
        return flipped_poly.tolist()

    def _flip_rle(rle, height, width):
        if 'counts' in rle and type(rle['counts']) == list:
            # Magic RLE format handling painfully discovered by looking at the
            # COCO API showAnns function.
            rle = mask_util.frPyObjects([rle], height, width)
        mask = mask_util.decode(rle)
        mask = mask[:, ::-1, :]
        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
        return rle

    flipped_segms = []
    for segm in segms:
        if is_poly(segm):
            # Polygon format
            flipped_segms.append([_flip_poly(poly, width) for poly in segm])
        else:
            # RLE format
            flipped_segms.append(_flip_rle(segm, height, width))
    return flipped_segms


def polys_to_mask(polygons, height, width):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed inside a height x width image. The resulting
    mask is therefore of shape (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def mask_to_bbox(mask):
    """Compute the tight bounding box of a binary mask."""
    xs = np.where(np.sum(mask, axis=0) > 0)[0]
    ys = np.where(np.sum(mask, axis=1) > 0)[0]

    if len(xs) == 0 or len(ys) == 0:
        return None

    x0 = xs[0]
    x1 = xs[-1]
    y0 = ys[0]
    y1 = ys[-1]
    return np.array((x0, y0, x1, y1), dtype=np.float32)


def polys_to_mask_wrt_box(polygons, box, M):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed in the given box and rasterized to an M x M
    mask. The resulting mask is therefore of shape (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def polys_to_boxes(polys):
    """Convert a list of polygons into an array of tight bounding boxes."""
    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
    for i in range(len(polys)):
        poly = polys[i]
        x0 = min(min(p[::2]) for p in poly)
        x1 = max(max(p[::2]) for p in poly)
        y0 = min(min(p[1::2]) for p in poly)
        y1 = max(max(p[1::2]) for p in poly)
        boxes_from_polys[i, :] = [x0, y0, x1, y1]

    return boxes_from_polys


def rle_mask_voting(
    top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'
):
    """Returns new masks (in correspondence with `top_masks`) by combining
    multiple overlapping masks coming from the pool of `all_masks`. Two methods
    for combining masks are supported: 'AVG' uses a weighted average of
    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
    ]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out


def rle_mask_nms(masks, dets, thresh, mode='IOU'):
    """Performs greedy non-maximum suppression based on an overlap measurement
    between masks. The type of measurement is determined by `mode` and can be
    either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
    mininum area).
    """
    if len(masks) == 0:
        return []
    if len(masks) == 1:
        return [0]

    if mode == 'IOU':
        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
        all_not_crowds = [False] * len(masks)
        ious = mask_util.iou(masks, masks, all_not_crowds)
    elif mode == 'IOMA':
        # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
        all_crowds = [True] * len(masks)
        # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        ious = mask_util.iou(masks, masks, all_crowds)
        # ... = max(area(intersect(m1, m2)) / area(m2),
        #           area(intersect(m2, m1)) / area(m1))
        ious = np.maximum(ious, ious.transpose())
    elif mode == 'CONTAINMENT':
        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        # Which measures how much m2 is contained inside m1
        all_crowds = [True] * len(masks)
        ious = mask_util.iou(masks, masks, all_crowds)
    else:
        raise NotImplementedError('Mode {} is unknown'.format(mode))

    scores = dets[:, 4]
    order = np.argsort(-scores)

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = ious[i, order[1:]]
        inds_to_keep = np.where(ovr <= thresh)[0]
        order = order[inds_to_keep + 1]

    return keep


def rle_masks_to_boxes(masks):
    """Computes the bounding box of each mask in a list of RLE encoded masks."""
    if len(masks) == 0:
        return []

    decoded_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks
    ]

    def get_bounds(flat_mask):
        inds = np.where(flat_mask > 0)[0]
        return inds.min(), inds.max()

    boxes = np.zeros((len(decoded_masks), 4))
    keep = [True] * len(decoded_masks)
    for i, mask in enumerate(decoded_masks):
        if mask.sum() == 0:
            keep[i] = False
            continue
        flat_mask = mask.sum(axis=0)
        x0, x1 = get_bounds(flat_mask)
        flat_mask = mask.sum(axis=1)
        y0, y1 = get_bounds(flat_mask)
        boxes[i, :] = (x0, y0, x1, y1)

    return boxes, np.where(keep)[0]


================================================
FILE: detectron/utils/subprocess.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Primitives for running multiple single-GPU jobs in parallel over subranges of
data. These are used for running multi-GPU inference. Subprocesses are used to
avoid the GIL since inference may involve non-trivial amounts of Python code.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os
import numpy as np
import subprocess
from six.moves import shlex_quote

from detectron.core.config import cfg
from detectron.utils.io import load_object
import detectron.utils.env as envu

import logging
logger = logging.getLogger(__name__)


def process_in_parallel(
    tag, total_range_size, binary, output_dir, opts=''
):
    """Run the specified binary cfg.NUM_GPUS times in parallel, each time as a
    subprocess that uses one GPU. The binary must accept the command line
    arguments `--range {start} {end}` that specify a data processing range.
    """
    # Snapshot the current cfg state in order to pass to the inference
    # subprocesses
    cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))
    with open(cfg_file, 'w') as f:
        envu.yaml_dump(cfg, stream=f)
    subprocess_env = os.environ.copy()
    processes = []
    subinds = np.array_split(range(total_range_size), cfg.NUM_GPUS)
    # Determine GPUs to use
    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
    if cuda_visible_devices:
        gpu_inds = map(int, cuda_visible_devices.split(','))
        assert -1 not in gpu_inds, \
            'Hiding GPU indices using the \'-1\' index is not supported'
    else:
        gpu_inds = range(cfg.NUM_GPUS)
    # Run the binary in cfg.NUM_GPUS subprocesses
    for i, gpu_ind in enumerate(gpu_inds):
        start = subinds[i][0]
        end = subinds[i][-1] + 1
        subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
        cmd = '{binary} --range {start} {end} --cfg {cfg_file} NUM_GPUS 1 {opts}'
        cmd = cmd.format(
            binary=shlex_quote(binary),
            start=int(start),
            end=int(end),
            cfg_file=shlex_quote(cfg_file),
            opts=' '.join([shlex_quote(opt) for opt in opts])
        )
        logger.info('{} range command {}: {}'.format(tag, i, cmd))
        if i == 0:
            subprocess_stdout = subprocess.PIPE
        else:
            filename = os.path.join(
                output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
            )
            subprocess_stdout = open(filename, 'w')  # NOQA (close below)
        p = subprocess.Popen(
            cmd,
            shell=True,
            env=subprocess_env,
            stdout=subprocess_stdout,
            stderr=subprocess.STDOUT,
            bufsize=1
        )
        processes.append((i, p, start, end, subprocess_stdout))
    # Log output from inference processes and collate their results
    outputs = []
    for i, p, start, end, subprocess_stdout in processes:
        log_subprocess_output(i, p, output_dir, tag, start, end)
        if i > 0:
            subprocess_stdout.close()
        range_file = os.path.join(
            output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
        )
        range_data = load_object(range_file)
        outputs.append(range_data)
    return outputs


def log_subprocess_output(i, p, output_dir, tag, start, end):
    """Capture the output of each subprocess and log it in the parent process.
    The first subprocess's output is logged in realtime. The output from the
    other subprocesses is buffered and then printed all at once (in order) when
    subprocesses finish.
    """
    outfile = os.path.join(
        output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
    )
    logger.info('# ' + '-' * 76 + ' #')
    logger.info(
        'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)
    )
    logger.info('# ' + '-' * 76 + ' #')
    if i == 0:
        # Stream the piped stdout from the first subprocess in realtime
        with open(outfile, 'wb') as f:
            for line in iter(p.stdout.readline, b''):
                print(line.rstrip().decode("utf8"))
                f.write(line)
        p.stdout.close()
        ret = p.wait()
    else:
        # For subprocesses >= 1, wait and dump their log file
        ret = p.wait()
        with open(outfile, 'r') as f:
            print(''.join(f.readlines()))
    assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)


================================================
FILE: detectron/utils/timer.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Timing related functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import time


class Timer:
    """A simple timer."""

    def __init__(self):
        self.reset()

    def tic(self):
        # using time.time instead of time.clock because time time.clock
        # does not normalize for multithreading
        self.start_time = time.time()

    def toc(self, average=True):
        self.diff = time.time() - self.start_time
        self.total_time += self.diff
        self.calls += 1
        self.average_time = self.total_time / self.calls
        if average:
            return self.average_time
        else:
            return self.diff

    def reset(self):
        self.total_time = 0.
        self.calls = 0
        self.start_time = 0.
        self.diff = 0.
        self.average_time = 0.


================================================
FILE: detectron/utils/train.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Utilities driving the train_net binary"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from shutil import copyfile
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import logging
import numpy as np
import os
import re

from caffe2.python import memonger
from caffe2.python import workspace

from detectron.core.config import cfg
from detectron.core.config import get_output_dir
from detectron.datasets.roidb import combined_roidb_for_training
from detectron.modeling import model_builder
from detectron.utils import lr_policy
from detectron.utils.training_stats import TrainingStats
import detectron.utils.env as envu
import detectron.utils.net as nu


def train_model():
    """Model training loop."""
    model, weights_file, start_iter, checkpoints, output_dir = create_model()
    if 'final' in checkpoints:
        # The final model was found in the output directory, so nothing to do
        return checkpoints

    setup_model_for_training(model, weights_file, output_dir)
    training_stats = TrainingStats(model)
    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)

    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
        if model.roi_data_loader.has_stopped():
            handle_critical_error(model, 'roi_data_loader failed')
        training_stats.IterTic()
        lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter))
        workspace.RunNet(model.net.Proto().name)
        if cur_iter == start_iter:
            nu.print_net(model)
        training_stats.IterToc()
        training_stats.UpdateIterStats()
        training_stats.LogIterStats(cur_iter, lr)

        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
            checkpoints[cur_iter] = os.path.join(
                output_dir, 'model_iter{}.pkl'.format(cur_iter)
            )
            nu.save_model_to_weights_file(checkpoints[cur_iter], model)

        if cur_iter == start_iter + training_stats.LOG_PERIOD:
            # Reset the iteration timer to remove outliers from the first few
            # SGD iterations
            training_stats.ResetIterTimer()

        if np.isnan(training_stats.iter_total_loss):
            handle_critical_error(model, 'Loss is NaN')

    # Save the final model
    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
    nu.save_model_to_weights_file(checkpoints['final'], model)
    # Shutdown data loading threads
    model.roi_data_loader.shutdown()
    return checkpoints


def handle_critical_error(model, msg):
    logger = logging.getLogger(__name__)
    logger.critical(msg)
    model.roi_data_loader.shutdown()
    raise Exception(msg)


def create_model():
    """Build the model and look for saved model checkpoints in case we can
    resume from one.
    """
    logger = logging.getLogger(__name__)
    start_iter = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    weights_file = cfg.TRAIN.WEIGHTS
    if cfg.TRAIN.AUTO_RESUME:
        # Check for the final model (indicates training already finished)
        final_path = os.path.join(output_dir, 'model_final.pkl')
        if os.path.exists(final_path):
            logger.info('model_final.pkl exists; no need to train!')
            return None, None, None, {'final': final_path}, output_dir

        if cfg.TRAIN.COPY_WEIGHTS:
            copyfile(
                weights_file,
                os.path.join(output_dir, os.path.basename(weights_file)))
            logger.info('Copy {} to {}'.format(weights_file, output_dir))

        # Find the most recent checkpoint (highest iteration number)
        files = os.listdir(output_dir)
        for f in files:
            iter_string = re.findall(r'(?<=model_iter)\d+(?=\.pkl)', f)
            if len(iter_string) > 0:
                checkpoint_iter = int(iter_string[0])
                if checkpoint_iter > start_iter:
                    # Start one iteration immediately after the checkpoint iter
                    start_iter = checkpoint_iter + 1
                    resume_weights_file = f

        if start_iter > 0:
            # Override the initialization weights with the found checkpoint
            weights_file = os.path.join(output_dir, resume_weights_file)
            logger.info(
                '========> Resuming from checkpoint {} at start iter {}'.
                format(weights_file, start_iter)
            )

    logger.info('Building model: {}'.format(cfg.MODEL.TYPE))
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    if cfg.MEMONGER:
        optimize_memory(model)
    # Performs random weight initialization as defined by the model
    workspace.RunNetOnce(model.param_init_net)
    return model, weights_file, start_iter, checkpoints, output_dir


def optimize_memory(model):
    """Save GPU memory through blob sharing."""
    for device in range(cfg.NUM_GPUS):
        namescope = 'gpu_{}/'.format(device)
        losses = [namescope + l for l in model.losses]
        model.net._net = memonger.share_grad_blobs(
            model.net,
            losses,
            set(model.param_to_grad.values()),
            namescope,
            share_activations=cfg.MEMONGER_SHARE_ACTIVATIONS
        )


def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir


def add_model_training_inputs(model):
    """Load the training dataset and attach the training inputs to the model."""
    logger = logging.getLogger(__name__)
    logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS))
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    logger.info('{:d} roidb entries'.format(len(roidb)))
    model_builder.add_training_inputs(model, roidb=roidb)


def dump_proto_files(model, output_dir):
    """Save prototxt descriptions of the training network and parameter
    initialization network."""
    with open(os.path.join(output_dir, 'net.pbtxt'), 'w') as fid:
        fid.write(str(model.net.Proto()))
    with open(os.path.join(output_dir, 'param_init_net.pbtxt'), 'w') as fid:
        fid.write(str(model.param_init_net.Proto()))


================================================
FILE: detectron/utils/training_stats.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Utilities for training."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import datetime
import numpy as np

from caffe2.python import utils as c2_py_utils

from detectron.core.config import cfg
from detectron.utils.logging import log_json_stats
from detectron.utils.logging import SmoothedValue
from detectron.utils.timer import Timer
import detectron.utils.net as nu


class TrainingStats:
    """Track vital training statistics."""

    def __init__(self, model):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = 20
        # Output logging period in SGD iterations
        self.LOG_PERIOD = 20
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses])
        )
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize()
        )

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0 or
                cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (
            cfg.SOLVER.MAX_ITER - cur_iter
        )
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(
            iter=cur_iter,
            lr=float(lr),
            time=self.iter_timer.average_time,
            loss=self.smoothed_total_loss.GetMedianValue(),
            eta=eta,
            mb_qsize=int(
                np.round(self.smoothed_mb_qsize.GetMedianValue())
            ),
            mem=int(np.ceil(mem_usage / 1024 / 1024))
        )
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetMedianValue()
        return stats


================================================
FILE: detectron/utils/vis.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Detection output visualization module."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np
import os

import pycocotools.mask as mask_util

from detectron.utils.colormap import colormap
import detectron.utils.env as envu
import detectron.utils.keypoints as keypoint_utils

# Matplotlib requires certain adjustments in some environments
# Must happen before importing matplotlib
envu.set_up_matplotlib()
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

plt.rcParams['pdf.fonttype'] = 42  # For editing in Adobe Illustrator


_GRAY = (218, 227, 218)
_GREEN = (18, 127, 15)
_WHITE = (255, 255, 255)


def kp_connections(keypoints):
    kp_lines = [
        [keypoints.index('left_eye'), keypoints.index('right_eye')],
        [keypoints.index('left_eye'), keypoints.index('nose')],
        [keypoints.index('right_eye'), keypoints.index('nose')],
        [keypoints.index('right_eye'), keypoints.index('right_ear')],
        [keypoints.index('left_eye'), keypoints.index('left_ear')],
        [keypoints.index('right_shoulder'), keypoints.index('right_elbow')],
        [keypoints.index('right_elbow'), keypoints.index('right_wrist')],
        [keypoints.index('left_shoulder'), keypoints.index('left_elbow')],
        [keypoints.index('left_elbow'), keypoints.index('left_wrist')],
        [keypoints.index('right_hip'), keypoints.index('right_knee')],
        [keypoints.index('right_knee'), keypoints.index('right_ankle')],
        [keypoints.index('left_hip'), keypoints.index('left_knee')],
        [keypoints.index('left_knee'), keypoints.index('left_ankle')],
        [keypoints.index('right_shoulder'), keypoints.index('left_shoulder')],
        [keypoints.index('right_hip'), keypoints.index('left_hip')],
    ]
    return kp_lines


def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
    """Convert from the class boxes/segms/keyps format generated by the testing
    code.
    """
    box_list = [b for b in cls_boxes if len(b) > 0]
    if len(box_list) > 0:
        boxes = np.concatenate(box_list)
    else:
        boxes = None
    if cls_segms is not None:
        segms = [s for slist in cls_segms for s in slist]
    else:
        segms = None
    if cls_keyps is not None:
        keyps = [k for klist in cls_keyps for k in klist]
    else:
        keyps = None
    classes = []
    for j in range(len(cls_boxes)):
        classes += [j] * len(cls_boxes[j])
    return boxes, segms, keyps, classes


def get_class_string(class_index, score, dataset):
    class_text = dataset.classes[class_index] if dataset is not None else \
        'id{:d}'.format(class_index)
    return class_text + ' {:0.2f}'.format(score).lstrip('0')


def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):
    """Visualizes a single binary mask."""

    img = img.astype(np.float32)
    idx = np.nonzero(mask)

    img[idx[0], idx[1], :] *= 1.0 - alpha
    img[idx[0], idx[1], :] += alpha * col

    if show_border:
        contours = cv2.findContours(
            mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]
        cv2.drawContours(img, contours, -1, _WHITE, border_thick, cv2.LINE_AA)

    return img.astype(np.uint8)


def vis_class(img, pos, class_str, font_scale=0.35):
    """Visualizes the class."""
    img = img.astype(np.uint8)
    x0, y0 = int(pos[0]), int(pos[1])
    # Compute text size.
    txt = class_str
    font = cv2.FONT_HERSHEY_SIMPLEX
    ((txt_w, txt_h), _) = cv2.getTextSize(txt, font, font_scale, 1)
    # Place text background.
    back_tl = x0, y0 - int(1.3 * txt_h)
    back_br = x0 + txt_w, y0
    cv2.rectangle(img, back_tl, back_br, _GREEN, -1)
    # Show text.
    txt_tl = x0, y0 - int(0.3 * txt_h)
    cv2.putText(img, txt, txt_tl, font, font_scale, _GRAY, lineType=cv2.LINE_AA)
    return img


def vis_bbox(img, bbox, thick=1):
    """Visualizes a bounding box."""
    img = img.astype(np.uint8)
    (x0, y0, w, h) = bbox
    x1, y1 = int(x0 + w), int(y0 + h)
    x0, y0 = int(x0), int(y0)
    cv2.rectangle(img, (x0, y0), (x1, y1), _GREEN, thickness=thick)
    return img


def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
    """Visualizes keypoints (adapted from vis_one_image).
    kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
    """
    dataset_keypoints, _ = keypoint_utils.get_keypoints()
    kp_lines = kp_connections(dataset_keypoints)

    # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
    colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]

    # Perform the drawing on a copy of the image, to allow for blending.
    kp_mask = np.copy(img)

    # Draw mid shoulder / mid hip first for better visualization.
    mid_shoulder = (
        kps[:2, dataset_keypoints.index('right_shoulder')] +
        kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
    sc_mid_shoulder = np.minimum(
        kps[2, dataset_keypoints.index('right_shoulder')],
        kps[2, dataset_keypoints.index('left_shoulder')])
    mid_hip = (
        kps[:2, dataset_keypoints.index('right_hip')] +
        kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
    sc_mid_hip = np.minimum(
        kps[2, dataset_keypoints.index('right_hip')],
        kps[2, dataset_keypoints.index('left_hip')])
    nose_idx = dataset_keypoints.index('nose')
    if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
        cv2.line(
            kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
            color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
    if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
        cv2.line(
            kp_mask, tuple(mid_shoulder), tuple(mid_hip),
            color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)

    # Draw the keypoints.
    for l in range(len(kp_lines)):
        i1 = kp_lines[l][0]
        i2 = kp_lines[l][1]
        p1 = kps[0, i1], kps[1, i1]
        p2 = kps[0, i2], kps[1, i2]
        if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
            cv2.line(
                kp_mask, p1, p2,
                color=colors[l], thickness=2, lineType=cv2.LINE_AA)
        if kps[2, i1] > kp_thresh:
            cv2.circle(
                kp_mask, p1,
                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
        if kps[2, i2] > kp_thresh:
            cv2.circle(
                kp_mask, p2,
                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)

    # Blend the keypoints.
    return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)


def vis_one_image_opencv(
        im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2,
        show_box=False, dataset=None, show_class=False):
    """Constructs a numpy array with the detections visualized."""

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return im

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)
        color_list = colormap()
        mask_color_id = 0

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        if show_box:
            im = vis_bbox(
                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))

        # show class (off by default)
        if show_class:
            class_str = get_class_string(classes[i], score, dataset)
            im = vis_class(im, (bbox[0], bbox[1] - 2), class_str)

        # show mask
        if segms is not None and len(segms) > i:
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1
            im = vis_mask(im, masks[..., i], color_mask)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            im = vis_keypoints(im, keypoints[i], kp_thresh)

    return im


def vis_one_image(
        im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9,
        kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False,
        ext='pdf', out_when_no_box=False):
    """Visual debugging of detections."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box:
        return

    dataset_keypoints, _ = keypoint_utils.get_keypoints()

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)

    color_list = colormap(rgb=True) / 255

    kp_lines = kp_connections(dataset_keypoints)
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]

    fig = plt.figure(frameon=False)
    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im)

    if boxes is None:
        sorted_inds = [] # avoid crash when 'boxes' is None
    else:
        # Display in largest to smallest order to reduce occlusion
        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        sorted_inds = np.argsort(-areas)

    mask_color_id = 0
    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1],
                          fill=False, edgecolor='g',
                          linewidth=0.5, alpha=box_alpha))

        if show_class:
            ax.text(
                bbox[0], bbox[1] - 2,
                get_class_string(classes[i], score, dataset),
                fontsize=3,
                family='serif',
                bbox=dict(
                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
                color='white')

        # show mask
        if segms is not None and len(segms) > i:
            img = np.ones(im.shape)
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1

            w_ratio = .4
            for c in range(3):
                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
            for c in range(3):
                img[:, :, c] = color_mask[c]
            e = masks[:, :, i]

            contour = cv2.findContours(
                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]

            for c in contour:
                polygon = Polygon(
                    c.reshape((-1, 2)),
                    fill=True, facecolor=color_mask,
                    edgecolor='w', linewidth=1.2,
                    alpha=0.5)
                ax.add_patch(polygon)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            kps = keypoints[i]
            plt.autoscale(False)
            for l in range(len(kp_lines)):
                i1 = kp_lines[l][0]
                i2 = kp_lines[l][1]
                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
                    x = [kps[0, i1], kps[0, i2]]
                    y = [kps[1, i1], kps[1, i2]]
                    line = plt.plot(x, y)
                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
                if kps[2, i1] > kp_thresh:
                    plt.plot(
                        kps[0, i1], kps[1, i1], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

                if kps[2, i2] > kp_thresh:
                    plt.plot(
                        kps[0, i2], kps[1, i2], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

            # add mid shoulder / mid hip for better visualization
            mid_shoulder = (
                kps[:2, dataset_keypoints.index('right_shoulder')] +
                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
            sc_mid_shoulder = np.minimum(
                kps[2, dataset_keypoints.index('right_shoulder')],
                kps[2, dataset_keypoints.index('left_shoulder')])
            mid_hip = (
                kps[:2, dataset_keypoints.index('right_hip')] +
                kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
            sc_mid_hip = np.minimum(
                kps[2, dataset_keypoints.index('right_hip')],
                kps[2, dataset_keypoints.index('left_hip')])
            if (sc_mid_shoulder > kp_thresh and
                    kps[2, dataset_keypoints.index('nose')] > kp_thresh):
                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                x = [mid_shoulder[0], mid_hip[0]]
                y = [mid_shoulder[1], mid_hip[1]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines) + 1], linewidth=1.0,
                    alpha=0.7)

    output_name = os.path.basename(im_name) + '.' + ext
    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)
    plt.close('all')


================================================
FILE: docker/Dockerfile
================================================
# Use Caffe2 image as parent image
FROM caffe2/caffe2:snapshot-py2-cuda9.0-cudnn7-ubuntu16.04

RUN mv /usr/local/caffe2 /usr/local/caffe2_build
ENV Caffe2_DIR /usr/local/caffe2_build

ENV PYTHONPATH /usr/local/caffe2_build:${PYTHONPATH}
ENV LD_LIBRARY_PATH /usr/local/caffe2_build/lib:${LD_LIBRARY_PATH}

# Clone the Detectron repository
RUN git clone https://github.com/facebookresearch/detectron /detectron

# Install Python dependencies
RUN pip install -r /detectron/requirements.txt

# Install the COCO API
RUN git clone https://github.com/cocodataset/cocoapi.git /cocoapi
WORKDIR /cocoapi/PythonAPI
RUN make install

# Go to Detectron root
WORKDIR /detectron

# Set up Python modules
RUN make

# [Optional] Build custom ops
RUN make ops


================================================
FILE: projects/GN/README.md
================================================
# Group Normalization for Mask R-CNN

<div align="center">
  <img src="gn.jpg" width="700px" />
</div>

## Introduction

This file provides Mask R-CNN baseline results and models trained with [Group Normalization](https://arxiv.org/abs/1803.08494):

```
@article{GroupNorm2018,
  title={Group Normalization},
  author={Yuxin Wu and Kaiming He},
  journal={arXiv:1803.08494},
  year={2018}
}
```

**Note:** This code uses the GroupNorm op implemented in CUDA, included in the Caffe2 repo. When writing this document, Caffe2 is being merged into PyTorch, and the GroupNorm op is located [here](https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu). Make sure your Caffe2 is up to date.

## Pretrained Models with GN

These models are trained in Caffe2 on the standard ImageNet-1k dataset, using GroupNorm with 32 groups (G=32).

- [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): ResNet-50 with GN, 24.0\% top-1 error (center-crop).
- [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): ResNet-101 with GN, 22.6\% top-1 error (center-crop).

## Results

### Baselines with BN

<table><tbody>
<!-- START E2E MASK RCNN BN TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<tr>
<td align="left"><sup><sub>R-50-FPN, BN*</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>8.6</sub></sup></td>
<td align="right"><sup><sub>0.897</sub></sup></td>
<td align="right"><sup><sub>44.9</sub></sup></td>
<td align="right"><sup><sub>0.099&nbsp;+&nbsp;0.018</sub></sup></td>
<td align="right"><sup><sub>38.6</sub></sup></td>
<td align="right"><sup><sub>34.5</sub></sup></td>
<td align="right"><sup><sub>35859007</sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN, BN*</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.2</sub></sup></td>
<td align="right"><sup><sub>0.993</sub></sup></td>
<td align="right"><sup><sub>49.7</sub></sup></td>
<td align="right"><sup><sub>0.126&nbsp;+&nbsp;0.017</sub></sup></td>
<td align="right"><sup><sub>40.9</sub></sup></td>
<td align="right"><sup><sub>36.4</sub></sup></td>
<td align="right"><sup><sub>35861858</sub></sup></td>
</tr>
<!-- END E2E MASK RCNN BN TABLE -->
</tbody></table>

**Notes:**

- This table is copied from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
- BN<sup>*</sup> means that BatchNorm (BN) is used for pre-training and is frozen and turned into a per-channel linear layer when fine-tuning. This is the default of Faster/Mask R-CNN and Detectron.

### Mask R-CNN with GN

#### Standard Mask R-CNN recipe
<table><tbody>
<!-- START E2E MASK RCNN GN TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN, GN</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.5</sub></sup></td>
<td align="right"><sup><sub>1.017</sub></sup></td>
<td align="right"><sup><sub>50.8</sub></sup></td>
<td align="right"><sup><sub>0.146&nbsp;+&nbsp;0.017</sub></sup></td>
<td align="right"><sup><sub>40.3</sub></sup></td>
<td align="right"><sup><sub>35.7</sub></sup></td>
<td align="right"><sup><sub>48616381</sub></sup></td>
<td align="left"><sup><sub>
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN, GN</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>2x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.4</sub></sup></td>
<td align="right"><sup><sub>1.151</sub></sup></td>
<td align="right"><sup><sub>57.5</sub></sup></td>
<td align="right"><sup><sub>0.180&nbsp;+&nbsp;0.015</sub></sup></td>
<td align="right"><sup><sub>41.8</sub></sup></td>
<td align="right"><sup><sub>36.8</sub></sup></td>
<td align="right"><sup><sub>48616724</sub></sup></td>
<td align="left"><sup><sub>
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<!-- END E2E MASK RCNN GN TABLE -->
</tbody></table>

**Notes:**
- GN is applied on: (i) ResNet layers inherited from pre-training, (ii) the FPN-specific layers, (iii) the RoI bbox head, and (iv) the RoI mask head.
- These GN models use a 4conv+1fc RoI box head. The BN<sup>*</sup> counterpart with this head performs similarly with the default 2fc head: using this codebase, R-50-FPN BN<sup>\*</sup> with 4conv+1fc has 38.8/34.4 box/mask AP.
- 2x is the default schedule (180k) in Detectron.

#### Longer training schedule
<table><tbody>
<!-- START E2E MASK RCNN GN 3X TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN, GN</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub><b>3x</b></sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.5</sub></sup></td>
<td align="right"><sup><sub>1.033</sub></sup></td>
<td align="right"><sup><sub>77.4</sub></sup></td>
<td align="right"><sup><sub>0.145&nbsp;+&nbsp;0.015</sub></sup></td>
<td align="right"><sup><sub>40.8</sub></sup></td>
<td align="right"><sup><sub>36.1</sub></sup></td>
<td align="right"><sup><sub>48734751</sub></sup></td>
<td align="left"><sup><sub>
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN, GN</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub><b>3x</b></sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.4</sub></sup></td>
<td align="right"><sup><sub>1.171</sub></sup></td>
<td align="right"><sup><sub>87.9</sub></sup></td>
<td align="right"><sup><sub>0.180&nbsp;+&nbsp;0.014</sub></sup></td>
<td align="right"><sup><sub>42.3</sub></sup></td>
<td align="right"><sup><sub>37.2</sub></sup></td>
<td align="right"><sup><sub>48734779</sub></sup></td>
<td align="left"><sup><sub>
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
  &nbsp;|&nbsp;
  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
</tr>
<!-- END E2E MASK RCNN GN 3X TABLE -->
</tbody></table>

**Notes:**
- 3x is a longer schedule (270k). GN can improve further when using the longer schedule, but its BN<sup>*</sup> counterpart remains similar (R-50-FPN BN<sup>\*</sup>: 38.9/34.3) with the longer schedule.
- These models are **without** any scale augmentation that can further [improve results](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#mask-r-cnn-with-bells--whistles).


### Explorations

#### Training Mask R-CNN from scratch

GN enables to train Mask R-CNN *from scratch* without ImageNet pre-training, despite the small batch size.

<table><tbody>
<!-- START E2E MASK RCNN GN SCRATCH TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
<th valign="bottom"><sup><sub>type</sub></sup></th>
<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
<th valign="bottom"><sup><sub>model id</sub></sup></th>
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub>R-50-FPN, GN, scratch</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>3x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>10.8</sub></sup></td>
<td align="right"><sup><sub>1.087</sub></sup></td>
<td align="right"><sup><sub>81.5</sub></sup></td>
<td align="right"><sup><sub>0.140&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>39.5</sub></sup></td>
<td align="right"><sup><sub>35.2</sub></sup></td>
<td align="right"><sup><sub>56421872</sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub>R-101-FPN, GN, scratch</sub></sup></td>
<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
<td align="left"><sup><sub>3x</sub></sup></td>
<td align="right"><sup><sub>2</sub></sup></td>
<td align="right"><sup><sub>12.7</sub></sup></td>
<td align="right"><sup><sub>1.243</sub></sup></td>
<td align="right"><sup><sub>93.2</sub></sup></td>
<td align="right"><sup><sub>0.177&nbsp;+&nbsp;0.019</sub></sup></td>
<td align="right"><sup><sub>41.0</sub></sup></td>
<td align="right"><sup><sub>36.4</sub></sup></td>
<td align="right"><sup><sub>56421911</sub></sup></td>
</tr>
<!-- END E2E MASK RCNN GN SCRATCH TABLE -->
</tbody></table>

**Notes:**
- To reproduce these results, see the config yaml files starting with ```scratch ```.
- These are results using ```freeze_at=0```. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.

&nbsp;

<table><tbody>
<!-- START E2E MASK RCNN GN SCRATCH TABLE -->
<!-- TABLE HEADER -->
<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
<!-- TABLE BODY -->
<tr>
<td align="left"><sup><sub><s>R-50-FPN, GN, scratch</s></sub></sup></td>
<td align="left"><sup><sub><s>Mask R-CNN</s></sub></sup></td>
<td align="left"><sup><sub><s>3x</s></sub></sup></td>
<td align="right"><sup><sub><s>2</s></sub></sup></td>
<td align="right"><sup><sub><s>10.5</s></sub></sup></td>
<td align="right"><sup><sub><s>0.990</s></sub></sup></td>
<td align="right"><sup><sub><s>74.3</s></sub></sup></td>
<td align="right"><sup><sub><s>0.146&nbsp;+&nbsp;0.020</s></sub></sup></td>
<td align="right"><sup><sub><s>36.2</s></sub></sup></td>
<td align="right"><sup><sub><s>32.5</s></sub></sup></td>
<td align="right"><sup><sub><s>49025460</s></sub></sup></td>
</tr>
<tr>
<td align="left"><sup><sub><s>R-101-FPN, GN, scratch</s></sub></sup></td>
<td align="left"><sup><sub><s>Mask R-CNN</s></sub></sup></td>
<td align="left"><sup><sub><s>3x</s></sub></sup></td>
<td align="right"><sup><sub><s>2</s></sub></sup></td>
<td align="right"><sup><sub><s>12.4</s></sub></sup></td>
<td align="right"><sup><sub><s>1.124</s></sub></sup></td>
<td align="right"><sup><sub><s>84.3</s></sub></sup></td>
<td align="right"><sup><sub><s>0.180&nbsp;+&nbsp;0.019</s></sub></sup></td>
<td align="right"><sup><sub><s>37.5</s></sub></sup></td>
<td align="right"><sup><sub><s>33.3</s></sub></sup></td>
<td align="right"><sup><sub><s>49024951</s></sub></sup></td>
</tr>
<!-- END E2E MASK RCNN GN SCRATCH TABLE -->
</tbody></table>

**Notes:**
- These are early results that followed the default training using ```freeze_at=2```. This means the layers of conv1 and res2 were simply random weights in the case of training from-scratch. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.


================================================
FILE: requirements.txt
================================================
numpy>=1.13
pyyaml==3.12
matplotlib
opencv-python>=3.2
setuptools
Cython
mock
scipy
six
future
protobuf


================================================
FILE: setup.py
================================================
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from Cython.Build import cythonize
from setuptools import Extension
from setuptools import setup

import numpy as np

_NP_INCLUDE_DIRS = np.get_include()


# Extension modules
ext_modules = [
    Extension(
        name='detectron.utils.cython_bbox',
        sources=[
            'detectron/utils/cython_bbox.pyx'
        ],
        extra_compile_args=[
            '-Wno-cpp'
        ],
        include_dirs=[
            _NP_INCLUDE_DIRS
        ]
    ),
    Extension(
        name='detectron.utils.cython_nms',
        sources=[
            'detectron/utils/cython_nms.pyx'
        ],
        extra_compile_args=[
            '-Wno-cpp'
        ],
        include_dirs=[
            _NP_INCLUDE_DIRS
        ]
    )
]

setup(
    name='Detectron',
    packages=['detectron'],
    ext_modules=cythonize(ext_modules)
)


================================================
FILE: tools/convert_cityscapes_to_coco.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import h5py
import json
import os
import imageio
import sys

import cityscapesscripts.evaluation.instances2dict_with_polygons as cs

import detectron.utils.segms as segms_util
import detectron.utils.boxes as bboxs_util


def parse_args():
    parser = argparse.ArgumentParser(description='Convert dataset')
    parser.add_argument(
        '--dataset', help="cocostuff, cityscapes", default=None, type=str)
    parser.add_argument(
        '--outdir', help="output dir for json files", default=None, type=str)
    parser.add_argument(
        '--datadir', help="data dir for annotations to be converted",
        default=None, type=str)
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


def convert_coco_stuff_mat(data_dir, out_dir):
    """Convert to png and save json with path. This currently only contains
    the segmentation labels for objects+stuff in cocostuff - if we need to
    combine with other labels from original COCO that will be a TODO."""
    sets = ['train', 'val']
    categories = []
    json_name = 'coco_stuff_%s.json'
    ann_dict = {}
    for data_set in sets:
        file_list = os.path.join(data_dir, '%s.txt')
        images = []
        with open(file_list % data_set) as f:
            for img_id, img_name in enumerate(f):
                img_name = img_name.replace('coco', 'COCO').strip('\n')
                image = {}
                mat_file = os.path.join(
                    data_dir, 'annotations/%s.mat' % img_name)
                data = h5py.File(mat_file, 'r')
                labelMap = data.get('S')
                if len(categories) == 0:
                    labelNames = data.get('names')
                    for idx, n in enumerate(labelNames):
                        categories.append(
                            {"id": idx, "name": ''.join(chr(i) for i in data[
                                n[0]])})
                    ann_dict['categories'] = categories
                imageio.imsave(
                    os.path.join(data_dir, img_name + '.png'), labelMap)
                image['width'] = labelMap.shape[0]
                image['height'] = labelMap.shape[1]
                image['file_name'] = img_name
                image['seg_file_name'] = img_name
                image['id'] = img_id
                images.append(image)
        ann_dict['images'] = images
        print("Num images: %s" % len(images))
        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
            outfile.write(json.dumps(ann_dict))


# for Cityscapes
def getLabelID(self, instID):
    if (instID < 1000):
        return instID
    else:
        return int(instID / 1000)


def convert_cityscapes_instance_only(
        data_dir, out_dir):
    """Convert from cityscapes format to COCO instance seg format - polygons"""
    sets = [
        'gtFine_val',
        # 'gtFine_train',
        # 'gtFine_test',

        # 'gtCoarse_train',
        # 'gtCoarse_val',
        # 'gtCoarse_train_extra'
    ]
    ann_dirs = [
        'gtFine_trainvaltest/gtFine/val',
        # 'gtFine_trainvaltest/gtFine/train',
        # 'gtFine_trainvaltest/gtFine/test',

        # 'gtCoarse/train',
        # 'gtCoarse/train_extra',
        # 'gtCoarse/val'
    ]
    json_name = 'instancesonly_filtered_%s.json'
    ends_in = '%s_polygons.json'
    img_id = 0
    ann_id = 0
    cat_id = 1
    category_dict = {}

    category_instancesonly = [
        'person',
        'rider',
        'car',
        'truck',
        'bus',
        'train',
        'motorcycle',
        'bicycle',
    ]

    for data_set, ann_dir in zip(sets, ann_dirs):
        print('Starting %s' % data_set)
        ann_dict = {}
        images = []
        annotations = []
        ann_dir = os.path.join(data_dir, ann_dir)
        for root, _, files in os.walk(ann_dir):
            for filename in files:
                if filename.endswith(ends_in % data_set.split('_')[0]):
                    if len(images) % 50 == 0:
                        print("Processed %s images, %s annotations" % (
                            len(images), len(annotations)))
                    json_ann = json.load(open(os.path.join(root, filename)))
                    image = {}
                    image['id'] = img_id
                    img_id += 1

                    image['width'] = json_ann['imgWidth']
                    image['height'] = json_ann['imgHeight']
                    image['file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'
                    image['seg_file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + \
                        '%s_instanceIds.png' % data_set.split('_')[0]
                    images.append(image)

                    fullname = os.path.join(root, image['seg_file_name'])
                    objects = cs.instances2dict_with_polygons(
                        [fullname], verbose=False)[fullname]

                    for object_cls in objects:
                        if object_cls not in category_instancesonly:
                            continue  # skip non-instance categories

                        for obj in objects[object_cls]:
                            if obj['contours'] == []:
                                print('Warning: empty contours.')
                                continue  # skip non-instance categories

                            len_p = [len(p) for p in obj['contours']]
                            if min(len_p) <= 4:
                                print('Warning: invalid contours.')
                                continue  # skip non-instance categories

                            ann = {}
                            ann['id'] = ann_id
                            ann_id += 1
                            ann['image_id'] = image['id']
                            ann['segmentation'] = obj['contours']

                            if object_cls not in category_dict:
                                category_dict[object_cls] = cat_id
                                cat_id += 1
                            ann['category_id'] = category_dict[object_cls]
                            ann['iscrowd'] = 0
                            ann['area'] = obj['pixelCount']
                            ann['bbox'] = bboxs_util.xyxy_to_xywh(
                                segms_util.polys_to_boxes(
                                    [ann['segmentation']])).tolist()[0]

                            annotations.append(ann)

        ann_dict['images'] = images
        categories = [{"id": category_dict[name], "name": name} for name in
                      category_dict]
        ann_dict['categories'] = categories
        ann_dict['annotations'] = annotations
        print("Num categories: %s" % len(categories))
        print("Num images: %s" % len(images))
        print("Num annotations: %s" % len(annotations))
        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
            outfile.write(json.dumps(ann_dict))


if __name__ == '__main__':
    args = parse_args()
    if args.dataset == "cityscapes_instance_only":
        convert_cityscapes_instance_only(args.datadir, args.outdir)
    elif args.dataset == "cocostuff":
        convert_coco_stuff_mat(args.datadir, args.outdir)
    else:
        print("Dataset not supported: %s" % args.dataset)


================================================
FILE: tools/convert_coco_model_to_cityscapes.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

# Convert a detection model trained for COCO into a model that can be fine-tuned
# on cityscapes
#
# cityscapes_to_coco

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import numpy as np
import os
import sys

import detectron.datasets.coco_to_cityscapes_id as cs
from detectron.utils.io import load_object
from detectron.utils.io import save_object

NUM_CS_CLS = 9
NUM_COCO_CLS = 81


def parse_args():
    parser = argparse.ArgumentParser(
        description='Convert a COCO pre-trained model for use with Cityscapes')
    parser.add_argument(
        '--coco_model', dest='coco_model_file_name',
        help='Pretrained network weights file path',
        default=None, type=str)
    parser.add_argument(
        '--convert_func', dest='convert_func',
        help='Blob conversion function',
        default='cityscapes_to_coco', type=str)
    parser.add_argument(
        '--output', dest='out_file_name',
        help='Output file path',
        default=None, type=str)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args


def convert_coco_blobs_to_cityscape_blobs(model_dict):
    for k, v in model_dict['blobs'].items():
        if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS:
            coco_blob = model_dict['blobs'][k]
            print(
                'Converting COCO blob {} with shape {}'.
                format(k, coco_blob.shape)
            )
            cs_blob = convert_coco_blob_to_cityscapes_blob(
                coco_blob, args.convert_func
            )
            print(' -> converted shape {}'.format(cs_blob.shape))
            model_dict['blobs'][k] = cs_blob


def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func):
    # coco blob (81, ...) or (81*4, ...)
    coco_shape = coco_blob.shape
    leading_factor = int(coco_shape[0] / NUM_COCO_CLS)
    tail_shape = list(coco_shape[1:])
    assert leading_factor == 1 or leading_factor == 4

    # Reshape in [num_classes, ...] form for easier manipulations
    coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape)
    # Default initialization uses Gaussian with mean and std to match the
    # existing parameters
    std = coco_blob.std()
    mean = coco_blob.mean()
    cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:])
    cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32)

    # Replace random parameters with COCO parameters if class mapping exists
    for i in range(NUM_CS_CLS):
        coco_cls_id = getattr(cs, convert_func)(i)
        if coco_cls_id >= 0:  # otherwise ignore (rand init)
            cs_blob[i] = coco_blob[coco_cls_id]

    cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape
    return cs_blob.reshape(cs_shape)


def remove_momentum(model_dict):
    for k in model_dict['blobs'].keys():
        if k.endswith('_momentum'):
            del model_dict['blobs'][k]


def load_and_convert_coco_model(args):
    model_dict = load_object(args.coco_model_file_name)
    remove_momentum(model_dict)
    convert_coco_blobs_to_cityscape_blobs(model_dict)
    return model_dict


if __name__ == '__main__':
    args = parse_args()
    print(args)
    assert os.path.exists(args.coco_model_file_name), \
        'Weights file does not exist'
    weights = load_and_convert_coco_model(args)

    save_object(weights, args.out_file_name)
    print('Wrote blobs to {}:'.format(args.out_file_name))
    print(sorted(weights['blobs'].keys()))


================================================
FILE: tools/convert_pkl_to_pb.py
================================================
#!/usr/bin/env python3

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Script to convert the model (.yaml and .pkl) trained by train_net to a
standard Caffe2 model in pb format (model.pb and model_init.pb). The converted
model is good for production usage, as it could run independently and efficiently
on CPU, GPU and mobile without depending on the detectron codebase.

Please see Caffe2 tutorial (
https://caffe2.ai/docs/tutorial-loading-pre-trained-models.html) for loading
the converted model, and run_model_pb() for running the model for inference.
"""

from __future__ import absolute_import, division, print_function, unicode_literals

import argparse
import copy
import os
import pprint
import sys

import caffe2.python.utils as putils
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import detectron.core.test_engine as test_engine
import detectron.utils.blob as blob_utils
import detectron.utils.c2 as c2_utils
import detectron.utils.model_convert_utils as mutils
import detectron.utils.vis as vis_utils
import numpy as np
from caffe2.caffe2.fb.predictor import predictor_exporter, predictor_py_utils
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace
from caffe2.python.predictor_constants import predictor_constants
from detectron.core.config import (
    assert_and_infer_cfg,
    cfg,
    merge_cfg_from_file,
    merge_cfg_from_list,
)
from detectron.modeling import generate_anchors
from detectron.utils.logging import setup_logging
from detectron.utils.model_convert_utils import convert_op_in_proto, op_filter


c2_utils.import_contrib_ops()
c2_utils.import_detectron_ops()

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)

logger = setup_logging(__name__)


def parse_args():
    parser = argparse.ArgumentParser(
        description="Convert a trained network to pb format"
    )
    parser.add_argument(
        "--cfg", dest="cfg_file", help="optional config file", default=None, type=str
    )
    parser.add_argument(
        "--net_name",
        dest="net_name",
        help="optional name for the net",
        default="detectron",
        type=str,
    )
    parser.add_argument(
        "--out_dir", dest="out_dir", help="output dir", default=None, type=str
    )
    parser.add_argument(
        "--test_img",
        dest="test_img",
        help="optional test image, used to verify the model conversion",
        default=None,
        type=str,
    )
    parser.add_argument(
        "--fuse_af", dest="fuse_af", help="1 to fuse_af", default=1, type=int
    )
    parser.add_argument(
        "--device",
        dest="device",
        help="Device to run the model on",
        choices=["cpu", "gpu"],
        default="cpu",
        type=str,
    )
    parser.add_argument(
        "--net_execution_type",
        dest="net_execution_type",
        help="caffe2 net execution type",
        choices=["simple", "dag"],
        default="simple",
        type=str,
    )
    parser.add_argument(
        "--use_nnpack",
        dest="use_nnpack",
        help="Use nnpack for conv",
        default=1,
        type=int,
    )
    parser.add_argument(
        "--logdb",
        dest="logdb",
        help="output to logfiledb instead of pb files",
        default=0,
        type=int,
    )
    parser.add_argument(
        "opts",
        help="See detectron/core/config.py for all options",
        default=None,
        nargs=argparse.REMAINDER,
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    ret = parser.parse_args()
    ret.out_dir = os.path.abspath(ret.out_dir)
    if ret.device == "gpu" and ret.use_nnpack:
        logger.warn("Should not use mobile engine for gpu model.")
        ret.use_nnpack = 0

    return ret


def unscope_name(name):
    return c2_utils.UnscopeName(name)


def reset_names(names):
    for i in range(len(names)):
        names[i] = unscope_name(names[i])


def convert_collect_and_distribute(
    op,
    blobs,
    roi_canonical_scale,
    roi_canonical_level,
    roi_max_level,
    roi_min_level,
    rpn_max_level,
    rpn_min_level,
    rpn_post_nms_topN,
):
    print(
        "Converting CollectAndDistributeFpnRpnProposals"
        " Python -> C++:\n{}".format(op)
    )
    assert op.name.startswith(
        "CollectAndDistributeFpnRpnProposalsOp"
    ), "Not valid CollectAndDistributeFpnRpnProposalsOp"

    inputs = [x for x in op.input]
    ret = core.CreateOperator(
        "CollectAndDistributeFpnRpnProposals",
        inputs,
        list(op.output),
        roi_canonical_scale=roi_canonical_scale,
        roi_canonical_level=roi_canonical_level,
        roi_max_level=roi_max_level,
        roi_min_level=roi_min_level,
        rpn_max_level=rpn_max_level,
        rpn_min_level=rpn_min_level,
        rpn_post_nms_topN=rpn_post_nms_topN,
    )
    return ret


def convert_gen_proposals(
    op, blobs, rpn_pre_nms_topN, rpn_post_nms_topN, rpn_nms_thresh, rpn_min_size
):
    print("Converting GenerateProposals Python -> C++:\n{}".format(op))
    assert op.name.startswith("GenerateProposalsOp"), "Not valid GenerateProposalsOp"

    spatial_scale = mutils.get_op_arg_valf(op, "spatial_scale", None)
    assert spatial_scale is not None

    lvl = int(op.input[0][-1]) if op.input[0][-1].isdigit() else None

    inputs = [x for x in op.input]
    anchor_name = "anchor{}".format(lvl) if lvl else "anchor"
    inputs.append(anchor_name)
    anchor_sizes = (
        (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.0 ** (lvl - cfg.FPN.RPN_MIN_LEVEL),)
        if lvl
        else cfg.RPN.SIZES
    )
    blobs[anchor_name] = get_anchors(spatial_scale, anchor_sizes)
    print("anchors {}".format(blobs[anchor_name]))

    ret = core.CreateOperator(
        "GenerateProposals",
        inputs,
        list(op.output),
        spatial_scale=spatial_scale,
        pre_nms_topN=rpn_pre_nms_topN,
        post_nms_topN=rpn_post_nms_topN,
        nms_thresh=rpn_nms_thresh,
        min_size=rpn_min_size,
        correct_transform_coords=True,
    )
    return ret, anchor_name


def get_anchors(spatial_scale, anchor_sizes):
    anchors = generate_anchors.generate_anchors(
        stride=1.0 / spatial_scale,
        sizes=anchor_sizes,
        aspect_ratios=cfg.RPN.ASPECT_RATIOS,
    ).astype(np.float32)
    return anchors


def reset_blob_names(blobs):
    ret = {unscope_name(x): blobs[x] for x in blobs}
    blobs.clear()
    blobs.update(ret)


def convert_net(args, net, blobs):
    @op_filter()
    def convert_op_name(op):
        if args.device != "gpu":
            if op.engine != "DEPTHWISE_3x3":
                op.engine = ""
            op.device_option.CopyFrom(caffe2_pb2.DeviceOption())
        reset_names(op.input)
        reset_names(op.output)
        return [op]

    @op_filter(type="Python")
    def convert_python(op):
        if op.name.startswith("GenerateProposalsOp"):
            gen_proposals_op, ext_input = convert_gen_proposals(
                op,
                blobs,
                rpn_min_size=float(cfg.TEST.RPN_MIN_SIZE),
                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,
                rpn_pre_nms_topN=cfg.TEST.RPN_PRE_NMS_TOP_N,
                rpn_nms_thresh=cfg.TEST.RPN_NMS_THRESH,
            )
            net.external_input.extend([ext_input])
            return [gen_proposals_op]
        elif op.name.startswith("CollectAndDistributeFpnRpnProposalsOp"):
            collect_dist_op = convert_collect_and_distribute(
                op,
                blobs,
                roi_canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
                roi_canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
                roi_max_level=cfg.FPN.ROI_MAX_LEVEL,
                roi_min_level=cfg.FPN.ROI_MIN_LEVEL,
                rpn_max_level=cfg.FPN.RPN_MAX_LEVEL,
                rpn_min_level=cfg.FPN.RPN_MIN_LEVEL,
                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,
            )
            return [collect_dist_op]
        else:
            raise ValueError("Failed to convert Python op {}".format(op.name))

    # Only convert UpsampleNearest to ResizeNearest when converting to pb so that the existing models is unchanged
    # https://github.com/facebookresearch/Detectron/pull/372#issuecomment-410248561
    @op_filter(type="UpsampleNearest")
    def convert_upsample_nearest(op):
        for arg in op.arg:
            if arg.name == "scale":
                scale = arg.i
                break
        else:
            raise KeyError('No attribute "scale" in UpsampleNearest op')
        resize_nearest_op = core.CreateOperator(
            "ResizeNearest",
            list(op.input),
            list(op.output),
            name=op.name,
            width_scale=float(scale),
            height_scale=float(scale),
        )
        return resize_nearest_op

    @op_filter()
    def convert_rpn_rois(op):
        for j in range(len(op.input)):
            if op.input[j] == "rois":
                print(
                    "Converting op {} input name: rois -> rpn_rois:\n{}".format(
                        op.type, op
                    )
                )
                op.input[j] = "rpn_rois"
        for j in range(len(op.output)):
            if op.output[j] == "rois":
                print(
                    "Converting op {} output name: rois -> rpn_rois:\n{}".format(
                        op.type, op
                    )
                )
                op.output[j] = "rpn_rois"
        return [op]

    @op_filter(type_in=["StopGradient", "Alias"])
    def convert_remove_op(op):
        print("Removing op {}:\n{}".format(op.type, op))
        return []

    # We want to apply to all operators, including converted
    # so run separately
    convert_op_in_proto(net, convert_remove_op)
    convert_op_in_proto(net, convert_upsample_nearest)
    convert_op_in_proto(net, convert_python)
    convert_op_in_proto(net, convert_op_name)
    convert_op_in_proto(net, convert_rpn_rois)

    reset_names(net.external_input)
    reset_names(net.external_output)

    reset_blob_names(blobs)


def add_bbox_ops(args, net, blobs):
    new_ops = []
    new_external_outputs = []

    # Operators for bboxes
    op_box = core.CreateOperator(
        "BBoxTransform",
        ["rpn_rois", "bbox_pred", "im_info"],
        ["pred_bbox"],
        weights=cfg.MODEL.BBOX_REG_WEIGHTS,
        apply_scale=False,
        correct_transform_coords=True,
    )
    new_ops.extend([op_box])

    blob_prob = "cls_prob"
    blob_box = "pred_bbox"
    op_nms = core.CreateOperator(
        "BoxWithNMSLimit",
        [blob_prob, blob_box],
        ["score_nms", "bbox_nms", "class_nms"],
        arg=[
            putils.MakeArgument("score_thresh", cfg.TEST.SCORE_THRESH),
            putils.MakeArgument("nms", cfg.TEST.NMS),
            putils.MakeArgument("detections_per_im", cfg.TEST.DETECTIONS_PER_IM),
            putils.MakeArgument("soft_nms_enabled", cfg.TEST.SOFT_NMS.ENABLED),
            putils.MakeArgument("soft_nms_method", cfg.TEST.SOFT_NMS.METHOD),
            putils.MakeArgument("soft_nms_sigma", cfg.TEST.SOFT_NMS.SIGMA),
        ],
    )
    new_ops.extend([op_nms])
    new_external_outputs.extend(["score_nms", "bbox_nms", "class_nms"])

    net.Proto().op.extend(new_ops)
    net.Proto().external_output.extend(new_external_outputs)


def convert_model_gpu(args, net, init_net):
    assert args.device == "gpu"

    ret_net = copy.deepcopy(net)
    ret_init_net = copy.deepcopy(init_net)

    cdo_cuda = mutils.get_device_option_cuda()
    cdo_cpu = mutils.get_device_option_cpu()

    CPU_OPS = [
        ["CollectAndDistributeFpnRpnProposals", None],
        ["GenerateProposals", None],
        ["BBoxTransform", None],
        ["BoxWithNMSLimit", None],
    ]
    CPU_BLOBS = ["im_info", "anchor"]

    @op_filter()
    def convert_op_gpu(op):
        for x in CPU_OPS:
            if mutils.filter_op(op, type=x[0], inputs=x[1]):
                return None
        op.device_option.CopyFrom(cdo_cuda)
        return [op]

    @op_filter()
    def convert_init_op_gpu(op):
        if op.output[0] in CPU_BLOBS:
            op.device_option.CopyFrom(cdo_cpu)
        else:
            op.device_option.CopyFrom(cdo_cuda)
        return [op]

    convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu)
    convert_op_in_proto(ret_net.Proto(), convert_op_gpu)

    ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net])

    return [ret[0][1], ret[0][0]]


def gen_init_net(net, blobs, empty_blobs):
    blobs = copy.deepcopy(blobs)
    for x in empty_blobs:
        blobs[x] = np.array([], dtype=np.float32)
    init_net = mutils.gen_init_net_from_blobs(blobs, net.external_inputs)
    init_net = core.Net(init_net)
    return init_net


def _save_image_graphs(args, all_net, all_init_net):
    print("Saving model graph...")
    mutils.save_graph(
        all_net.Proto(), os.path.join(args.out_dir, "model_def.png"), op_only=False
    )
    print("Model def image saved to {}.".format(args.out_dir))


def _save_models(all_net, all_init_net, args):
    print("Writing converted model to {}...".format(args.out_dir))
    fname = "model"

    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)

    with open(os.path.join(args.out_dir, fname + ".pb"), "wb") as f:
        f.write(all_net.Proto().SerializeToString())
    with open(os.path.join(args.out_dir, fname + ".pbtxt"), "wb") as f:
        f.write(str(all_net.Proto()))
    with open(os.path.join(args.out_dir, fname + "_init.pb"), "wb") as f:
        f.write(all_init_net.Proto().SerializeToString())

    _save_image_graphs(args, all_net, all_init_net)


def load_model(args):
    model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS)
    blobs = mutils.get_ws_blobs()

    return model, blobs


def _get_result_blobs(check_blobs):
    ret = {}
    for x in check_blobs:
        sn = core.ScopedName(x)
        if workspace.HasBlob(sn):
            ret[x] = workspace.FetchBlob(sn)
        else:
            ret[x] = None

    return ret


def _sort_results(boxes, segms, keypoints, classes):
    indices = np.argsort(boxes[:, -1])[::-1]
    if boxes is not None:
        boxes = boxes[indices, :]
    if segms is not None:
        segms = [segms[x] for x in indices]
    if keypoints is not None:
        keypoints = [keypoints[x] for x in indices]
    if classes is not None:
        if isinstance(classes, list):
            classes = [classes[x] for x in indices]
        else:
            classes = classes[indices]

    return boxes, segms, keypoints, classes


def run_model_cfg(args, im, check_blobs):
    workspace.ResetWorkspace()
    model, _ = load_model(args)
    with c2_utils.NamedCudaScope(0):
        cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all(
            model, im, None, None
        )

    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
        cls_boxes, cls_segms, cls_keyps
    )

    # sort the results based on score for comparision
    boxes, segms, keypoints, classes = _sort_results(boxes, segms, keypoints, classes)

    # write final results back to workspace
    def _ornone(res):
        return np.array(res) if res is not None else np.array([], dtype=np.float32)

    with c2_utils.NamedCudaScope(0):
        workspace.FeedBlob(core.ScopedName("result_boxes"), _ornone(boxes))
        workspace.FeedBlob(core.ScopedName("result_segms"), _ornone(segms))
        workspace.FeedBlob(core.ScopedName("result_keypoints"), _ornone(keypoints))
        workspace.FeedBlob(core.ScopedName("result_classids"), _ornone(classes))

    # get result blobs
    with c2_utils.NamedCudaScope(0):
        ret = _get_result_blobs(check_blobs)

    return ret


def _prepare_blobs(im, pixel_means, target_size, max_size):
    """ Reference: blob.prep_im_for_blob() """

    im = im.astype(np.float32, copy=False)
    im -= pixel_means
    im_shape = im.shape

    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    im_scale = float(target_size) / float(im_size_min)
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    im = cv2.resize(
        im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
    )

    # Reuse code in blob_utils and fit FPN
    blob = blob_utils.im_list_to_blob([im])

    blobs = {}
    blobs["data"] = blob
    blobs["im_info"] = np.array(
        [[blob.shape[2], blob.shape[3], im_scale]], dtype=np.float32
    )
    return blobs


def run_model_pb(args, net, init_net, im, check_blobs):
    workspace.ResetWorkspace()
    workspace.RunNetOnce(init_net)
    mutils.create_input_blobs_for_net(net.Proto())
    workspace.CreateNet(net)

    # input_blobs, _ = core_test._get_blobs(im, None)
    input_blobs = _prepare_blobs(im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    gpu_blobs = []
    if args.device == "gpu":
        gpu_blobs = ["data"]
    for k, v in input_blobs.items():
        workspace.FeedBlob(
            core.ScopedName(k),
            v,
            mutils.get_device_option_cuda()
            if k in gpu_blobs
            else mutils.get_device_option_cpu(),
        )

    try:
        workspace.RunNet(net)
        scores = workspace.FetchBlob("score_nms")
        classids = workspace.FetchBlob("class_nms")
        boxes = workspace.FetchBlob("bbox_nms")
    except Exception as e:
        print("Running pb model failed.\n{}".format(e))
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)

    boxes = np.column_stack((boxes, scores))

    # sort the results based on score for comparision
    boxes, _, _, classids = _sort_results(boxes, None, None, classids)

    # write final result back to workspace
    workspace.FeedBlob("result_boxes", boxes)
    workspace.FeedBlob("result_classids", classids)

    ret = _get_result_blobs(check_blobs)

    return ret


def verify_model(args, model_pb, test_img_file):
    check_blobs = ["result_boxes", "result_classids"]  # result

    print("Loading test file {}...".format(test_img_file))
    test_img = cv2.imread(test_img_file)
    assert test_img is not None

    def _run_cfg_func(im, blobs):
        return run_model_cfg(args, im, check_blobs)

    def _run_pb_func(im, blobs):
        return run_model_pb(args, model_pb[0], model_pb[1], im, check_blobs)

    print("Checking models...")
    assert mutils.compare_model(_run_cfg_func, _run_pb_func, test_img, check_blobs)


def _export_to_logfiledb(args, net, init_net, inputs, out_file, extra_out_tensors=None):
    out_tensors = list(net.Proto().external_output)
    if extra_out_tensors is not None:
        out_tensors += extra_out_tensors
    params = list(set(net.Proto().external_input) - set(inputs))
    net_type = None
    predictor_export_meta = predictor_exporter.PredictorExportMeta(
        predict_net=net,
        parameters=params,
        inputs=inputs,
        outputs=out_tensors,
        net_type=net_type,
    )

    logger.info("Exporting Caffe2 model to {}".format(out_file))
    predictor_exporter.save_to_db(
        db_type="log_file_db",
        db_destination=out_file,
        predictor_export_meta=predictor_export_meta,
    )


def main():
    workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"])
    args = parse_args()
    logger.info("Called with args:")
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    logger.info("Converting model with config:")
    logger.info(pprint.pformat(cfg))

    # script will stop when it can't find an operator rather
    # than stopping based on these flags
    #
    # assert not cfg.MODEL.KEYPOINTS_ON, "Keypoint model not supported."
    # assert not cfg.MODEL.MASK_ON, "Mask model not supported."
    # assert not cfg.FPN.FPN_ON, "FPN not supported."
    # assert not cfg.RETINANET.RETINANET_ON, "RetinaNet model not supported."

    # load model from cfg
    model, blobs = load_model(args)

    net = core.Net("")
    net.Proto().op.extend(copy.deepcopy(model.net.Proto().op))
    net.Proto().external_input.extend(copy.deepcopy(model.net.Proto().external_input))
    net.Proto().external_output.extend(copy.deepcopy(model.net.Proto().external_output))
    net.Proto().type = args.net_execution_type
    net.Proto().num_workers = 1 if args.net_execution_type == "simple" else 4

    # Reset the device_option, change to unscope name and replace python operators
    convert_net(args, net.Proto(), blobs)

    # add operators for bbox
    add_bbox_ops(args, net, blobs)

    if args.fuse_af:
        print("Fusing affine channel...")
        net, blobs = mutils.fuse_net_affine(net, blobs)

    if args.use_nnpack:
        mutils.update_mobile_engines(net.Proto())

    # generate init net
    empty_blobs = ["data", "im_info"]
    init_net = gen_init_net(net, blobs, empty_blobs)

    if args.device == "gpu":
        [net, init_net] = convert_model_gpu(args, net, init_net)

    net.Proto().name = args.net_name
    init_net.Proto().name = args.net_name + "_init"

    if args.test_img is not None:
        verify_model(args, [net, init_net], args.test_img)

    if args.logdb == 1:
        output_file = os.path.join(args.out_dir, "model.logfiledb")
        _export_to_logfiledb(args, net, init_net, empty_blobs, output_file)
    else:
        _save_models(net, init_net, args)

if __name__ == "__main__":
    main()


================================================
FILE: tools/convert_selective_search.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Script to convert Selective Search proposal boxes into the Detectron proposal
file format.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import scipy.io as sio
import sys

from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import save_object


if __name__ == '__main__':
    dataset_name = sys.argv[1]
    file_in = sys.argv[2]
    file_out = sys.argv[3]

    ds = JsonDataset(dataset_name)
    roidb = ds.get_roidb()
    raw_data = sio.loadmat(file_in)['boxes'].ravel()
    assert raw_data.shape[0] == len(roidb)

    boxes = []
    scores = []
    ids = []
    for i in range(raw_data.shape[0]):
        if i % 1000 == 0:
            print('{}/{}'.format(i + 1, len(roidb)))
        # selective search boxes are 1-indexed and (y1, x1, y2, x2)
        i_boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
        boxes.append(i_boxes.astype(np.float32))
        scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32))
        ids.append(roidb[i]['id'])

    save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out)


================================================
FILE: tools/generate_testdev_from_test.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Given a full set of results (boxes, masks, or keypoints) on the 2017 COCO
test set, this script extracts the results subset that corresponds to 2017
test-dev. The test-dev subset can then be submitted to the COCO evaluation
server.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import json
import os
import sys

from detectron.datasets.dataset_catalog import get_ann_fn
from detectron.utils.timer import Timer


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--json', dest='json_file',
        help='detections json file',
        default='', type=str)
    parser.add_argument(
        '--output-dir', dest='output_dir',
        help='output directory',
        default='/tmp', type=str)
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()
    return args


def convert(json_file, output_dir):
    print('Reading: {}'.format(json_file))
    with open(json_file, 'r') as fid:
        dt = json.load(fid)
    print('done!')

    test_image_info = get_ann_fn('coco_2017_test')
    with open(test_image_info, 'r') as fid:
        info_test = json.load(fid)
    image_test = info_test['images']
    image_test_id = [i['id'] for i in image_test]
    print('{} has {} images'.format(test_image_info, len(image_test_id)))

    test_dev_image_info = get_ann_fn('coco_2017_test-dev')
    with open(test_dev_image_info, 'r') as fid:
        info_testdev = json.load(fid)
    image_testdev = info_testdev['images']
    image_testdev_id = [i['id'] for i in image_testdev]
    print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id)))

    dt_testdev = []
    print('Filtering test-dev from test...')
    t = Timer()
    t.tic()
    for i in range(len(dt)):
        if i % 1000 == 0:
            print('{}/{}'.format(i, len(dt)))
        if dt[i]['image_id'] in image_testdev_id:
            dt_testdev.append(dt[i])
    print('Done filtering ({:2}s)!'.format(t.toc()))

    filename, file_extension = os.path.splitext(os.path.basename(json_file))
    filename = filename + '_test-dev'
    filename = os.path.join(output_dir, filename + file_extension)
    with open(filename, 'w') as fid:
        info_test = json.dump(dt_testdev, fid)
    print('Done writing: {}!'.format(filename))


if __name__ == '__main__':
    opts = parse_args()
    convert(opts.json_file, opts.output_dir)


================================================
FILE: tools/infer.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Perform inference on a single image or all images with a certain extension
(e.g., .jpg) in a folder. Allows for using a combination of multiple models.
For example, one model may be used for RPN, another model for Fast R-CNN style
box detection, yet another model to predict masks, and yet another model to
predict keypoints.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import logging
import os
import sys

from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import load_cfg
from detectron.core.config import merge_cfg_from_cfg
from detectron.core.config import merge_cfg_from_file
from detectron.utils.io import cache_url
from detectron.utils.logging import setup_logging
import detectron.core.rpn_generator as rpn_engine
import detectron.core.test_engine as model_engine
import detectron.datasets.dummy_datasets as dummy_datasets
import detectron.utils.c2 as c2_utils
import detectron.utils.env as envu
import detectron.utils.vis as vis_utils

c2_utils.import_detectron_ops()

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)

# infer.py
#   --im [path/to/image.jpg] \
#   --rpn-model [path/to/rpn/model.pkl] \
#   --rpn-cfg [path/to/rpn/config.yaml] \
#   --output-dir [path/to/output/dir] \
#   [model1] [config1] [model2] [config2] ...


def parse_args():
    parser = argparse.ArgumentParser(description='Inference on an image')
    parser.add_argument(
        '--im', dest='im_file', help='input image', default=None, type=str
    )
    parser.add_argument(
        '--rpn-pkl',
        dest='rpn_pkl',
        help='rpn model file (pkl)',
        default=None,
        type=str
    )
    parser.add_argument(
        '--rpn-cfg',
        dest='rpn_cfg',
        help='cfg model file (yaml)',
        default=None,
        type=str
    )
    parser.add_argument(
        '--output-dir',
        dest='output_dir',
        help='directory for visualization pdfs (default: /tmp/infer)',
        default='/tmp/infer',
        type=str
    )
    parser.add_argument(
        'models_to_run',
        help='pairs of models & configs, listed like so: [pkl1] [yaml1] [pkl2] [yaml2] ...',
        default=None,
        nargs=argparse.REMAINDER
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


def get_rpn_box_proposals(im, args):
    cfg.immutable(False)
    merge_cfg_from_file(args.rpn_cfg)
    cfg.NUM_GPUS = 1
    cfg.MODEL.RPN_ONLY = True
    cfg.TEST.RPN_PRE_NMS_TOP_N = 10000
    cfg.TEST.RPN_POST_NMS_TOP_N = 2000
    assert_and_infer_cfg(cache_urls=False)

    model = model_engine.initialize_model_from_cfg(args.rpn_pkl)
    with c2_utils.NamedCudaScope(0):
        boxes, scores = rpn_engine.im_proposals(model, im)
    return boxes, scores


def main(args):
    logger = logging.getLogger(__name__)
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()
    cfg_orig = load_cfg(envu.yaml_dump(cfg))
    im = cv2.imread(args.im_file)

    if args.rpn_pkl is not None:
        proposal_boxes, _proposal_scores = get_rpn_box_proposals(im, args)
        workspace.ResetWorkspace()
    else:
        proposal_boxes = None

    cls_boxes, cls_segms, cls_keyps = None, None, None
    for i in range(0, len(args.models_to_run), 2):
        pkl = args.models_to_run[i]
        yml = args.models_to_run[i + 1]
        cfg.immutable(False)
        merge_cfg_from_cfg(cfg_orig)
        merge_cfg_from_file(yml)
        if len(pkl) > 0:
            weights_file = pkl
        else:
            weights_file = cfg.TEST.WEIGHTS
        cfg.NUM_GPUS = 1
        assert_and_infer_cfg(cache_urls=False)
        model = model_engine.initialize_model_from_cfg(weights_file)
        with c2_utils.NamedCudaScope(0):
            cls_boxes_, cls_segms_, cls_keyps_ = \
                model_engine.im_detect_all(model, im, proposal_boxes)
        cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes
        cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms
        cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps
        workspace.ResetWorkspace()

    out_name = os.path.join(
        args.output_dir, '{}'.format(os.path.basename(args.im_file) + '.pdf')
    )
    logger.info('Processing {} -> {}'.format(args.im_file, out_name))

    vis_utils.vis_one_image(
        im[:, :, ::-1],
        args.im_file,
        args.output_dir,
        cls_boxes,
        cls_segms,
        cls_keyps,
        dataset=dummy_coco_dataset,
        box_alpha=0.3,
        show_class=True,
        thresh=0.7,
        kp_thresh=2
    )


def check_args(args):
    assert (
        (args.rpn_pkl is not None and args.rpn_cfg is not None) or
        (args.rpn_pkl is None and args.rpn_cfg is None)
    )
    if args.rpn_pkl is not None:
        args.rpn_pkl = cache_url(args.rpn_pkl, cfg.DOWNLOAD_CACHE)
        assert os.path.exists(args.rpn_pkl)
        assert os.path.exists(args.rpn_cfg)
    if args.models_to_run is not None:
        assert len(args.models_to_run) % 2 == 0
        for i, model_file in enumerate(args.models_to_run):
            if len(model_file) > 0:
                if i % 2 == 0:
                    model_file = cache_url(model_file, cfg.DOWNLOAD_CACHE)
                    args.models_to_run[i] = model_file
                assert os.path.exists(model_file), \
                    '\'{}\' does not exist'.format(model_file)


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    setup_logging(__name__)
    args = parse_args()
    check_args(args)
    main(args)


================================================
FILE: tools/infer_simple.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Perform inference on a single image or all images with a certain extension
(e.g., .jpg) in a folder.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from collections import defaultdict
import argparse
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import glob
import logging
import os
import sys
import time

from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import merge_cfg_from_file
from detectron.utils.io import cache_url
from detectron.utils.logging import setup_logging
from detectron.utils.timer import Timer
import detectron.core.test_engine as infer_engine
import detectron.datasets.dummy_datasets as dummy_datasets
import detectron.utils.c2 as c2_utils
import detectron.utils.vis as vis_utils

c2_utils.import_detectron_ops()

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)


def parse_args():
    parser = argparse.ArgumentParser(description='End-to-end inference')
    parser.add_argument(
        '--cfg',
        dest='cfg',
        help='cfg model file (/path/to/model_config.yaml)',
        default=None,
        type=str
    )
    parser.add_argument(
        '--wts',
        dest='weights',
        help='weights model file (/path/to/model_weights.pkl)',
        default=None,
        type=str
    )
    parser.add_argument(
        '--output-dir',
        dest='output_dir',
        help='directory for visualization pdfs (default: /tmp/infer_simple)',
        default='/tmp/infer_simple',
        type=str
    )
    parser.add_argument(
        '--image-ext',
        dest='image_ext',
        help='image file name extension (default: jpg)',
        default='jpg',
        type=str
    )
    parser.add_argument(
        '--always-out',
        dest='out_when_no_box',
        help='output image even when no object is found',
        action='store_true'
    )
    parser.add_argument(
        '--output-ext',
        dest='output_ext',
        help='output image file format (default: pdf)',
        default='pdf',
        type=str
    )
    parser.add_argument(
        '--thresh',
        dest='thresh',
        help='Threshold for visualizing detections',
        default=0.7,
        type=float
    )
    parser.add_argument(
        '--kp-thresh',
        dest='kp_thresh',
        help='Threshold for visualizing keypoints',
        default=2.0,
        type=float
    )
    parser.add_argument(
        'im_or_folder', help='image or folder of images', default=None
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


def main(args):
    logger = logging.getLogger(__name__)

    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)

    assert not cfg.MODEL.RPN_ONLY, \
        'RPN models are not supported'
    assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \
        'Models that require precomputed proposals are not supported'

    model = infer_engine.initialize_model_from_cfg(args.weights)
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    for i, im_name in enumerate(im_list):
        out_name = os.path.join(
            args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext)
        )
        logger.info('Processing {} -> {}'.format(im_name, out_name))
        im = cv2.imread(im_name)
        timers = defaultdict(Timer)
        t = time.time()
        with c2_utils.NamedCudaScope(0):
            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                model, im, None, timers=timers
            )
        logger.info('Inference time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
        if i == 0:
            logger.info(
                ' \ Note: inference on the first image will be slower than the '
                'rest (caches and auto-tuning need to warm up)'
            )

        vis_utils.vis_one_image(
            im[:, :, ::-1],  # BGR -> RGB for visualization
            im_name,
            args.output_dir,
            cls_boxes,
            cls_segms,
            cls_keyps,
            dataset=dummy_coco_dataset,
            box_alpha=0.3,
            show_class=True,
            thresh=args.thresh,
            kp_thresh=args.kp_thresh,
            ext=args.output_ext,
            out_when_no_box=args.out_when_no_box
        )


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    setup_logging(__name__)
    args = parse_args()
    main(args)


================================================
FILE: tools/pickle_caffe_blobs.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Script for converting Caffe (<= 1.0) models into the the simple state dict
format used by Detectron. For example, this script can convert the orignal
ResNet models released by MSRA.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import numpy as np
import os
import sys

from caffe.proto import caffe_pb2
from caffe2.proto import caffe2_pb2
from caffe2.python import caffe_translator
from caffe2.python import utils
from google.protobuf import text_format

from detectron.utils.io import save_object

def parse_args():
    parser = argparse.ArgumentParser(
        description='Dump weights from a Caffe model'
    )
    parser.add_argument(
        '--prototxt',
        dest='prototxt_file_name',
        help='Network definition prototxt file path',
        default=None,
        type=str
    )
    parser.add_argument(
        '--caffemodel',
        dest='caffemodel_file_name',
        help='Pretrained network weights file path',
        default=None,
        type=str
    )
    parser.add_argument(
        '--output',
        dest='out_file_name',
        help='Output file path',
        default=None,
        type=str
    )

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args


def normalize_resnet_name(name):
    if name.find('res') == 0 and name.find('res_') == -1:
        # E.g.,
        #  res4b11_branch2c -> res4_11_branch2c
        #  res2a_branch1 -> res2_0_branch1
        chunk = name[len('res'):name.find('_')]
        name = (
            'res' + chunk[0] + '_' + str(
                int(chunk[2:]) if len(chunk) > 2  # e.g., "b1" -> 1
                else ord(chunk[1]) - ord('a')
            ) +  # e.g., "a" -> 0
            name[name.find('_'):]
        )
    return name


def pickle_weights(out_file_name, weights):
    blobs = {
        normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob)
        for blob in weights.protos
    }
    save_object(blobs, out_file_name)
    print('Wrote blobs:')
    print(sorted(blobs.keys()))


def add_missing_biases(caffenet_weights):
    for layer in caffenet_weights.layer:
        if layer.type == 'Convolution' and len(layer.blobs) == 1:
            num_filters = layer.blobs[0].shape.dim[0]
            bias_blob = caffe_pb2.BlobProto()
            bias_blob.data.extend(np.zeros(num_filters))
            bias_blob.num, bias_blob.channels, bias_blob.height = 1, 1, 1
            bias_blob.width = num_filters
            layer.blobs.extend([bias_blob])


def remove_spatial_bn_layers(caffenet, caffenet_weights):
    # Layer types associated with spatial batch norm
    remove_types = ['BatchNorm', 'Scale']

    def _remove_layers(net):
        for i in reversed(range(len(net.layer))):
            if net.layer[i].type in remove_types:
                net.layer.pop(i)

    # First remove layers from caffenet proto
    _remove_layers(caffenet)
    # We'll return these so we can save the batch norm parameters
    bn_layers = [
        layer for layer in caffenet_weights.layer if layer.type in remove_types
    ]
    _remove_layers(caffenet_weights)

    def _create_tensor(arr, shape, name):
        t = caffe2_pb2.TensorProto()
        t.name = name
        t.data_type = caffe2_pb2.TensorProto.FLOAT
        t.dims.extend(shape.dim)
        t.float_data.extend(arr)
        assert len(t.float_data) == np.prod(t.dims), 'Data size, shape mismatch'
        return t

    bn_tensors = []
    for (bn, scl) in zip(bn_layers[0::2], bn_layers[1::2]):
        assert bn.name[len('bn'):] == scl.name[len('scale'):], 'Pair mismatch'
        blob_out = 'res' + bn.name[len('bn'):] + '_bn'
        bn_mean = np.asarray(bn.blobs[0].data)
        bn_var = np.asarray(bn.blobs[1].data)
        scale = np.asarray(scl.blobs[0].data)
        bias = np.asarray(scl.blobs[1].data)
        std = np.sqrt(bn_var + 1e-5)
        new_scale = scale / std
        new_bias = bias - bn_mean * scale / std
        new_scale_tensor = _create_tensor(
            new_scale, bn.blobs[0].shape, blob_out + '_s'
        )
        new_bias_tensor = _create_tensor(
            new_bias, bn.blobs[0].shape, blob_out + '_b'
        )
        bn_tensors.extend([new_scale_tensor, new_bias_tensor])
    return bn_tensors


def remove_layers_without_parameters(caffenet, caffenet_weights):
    for i in reversed(range(len(caffenet_weights.layer))):
        if len(caffenet_weights.layer[i].blobs) == 0:
            # Search for the corresponding layer in caffenet and remove it
            name = caffenet_weights.layer[i].name
            found = False
            for j in range(len(caffenet.layer)):
                if caffenet.layer[j].name == name:
                    caffenet.layer.pop(j)
                    found = True
                    break
            if not found and name[-len('_split'):] != '_split':
                print('Warning: layer {} not found in caffenet'.format(name))
            caffenet_weights.layer.pop(i)


def normalize_shape(caffenet_weights):
    for layer in caffenet_weights.layer:
        for blob in layer.blobs:
            shape = (blob.num, blob.channels, blob.height, blob.width)
            if len(blob.data) != np.prod(shape):
                shape = tuple(blob.shape.dim)
                if len(shape) == 1:
                    # Handle biases
                    shape = (1, 1, 1, shape[0])
                if len(shape) == 2:
                    # Handle InnerProduct layers
                    shape = (1, 1, shape[0], shape[1])
                assert len(shape) == 4
                blob.num, blob.channels, blob.height, blob.width = shape


def load_and_convert_caffe_model(prototxt_file_name, caffemodel_file_name):
    caffenet = caffe_pb2.NetParameter()
    caffenet_weights = caffe_pb2.NetParameter()
    text_format.Merge(open(prototxt_file_name).read(), caffenet)
    caffenet_weights.ParseFromString(open(caffemodel_file_name).read())
    # C2 conv layers current require biases, but they are optional in C1
    # Add zeros as biases is they are missing
    add_missing_biases(caffenet_weights)
    # We only care about getting parameters, so remove layers w/o parameters
    remove_layers_without_parameters(caffenet, caffenet_weights)
    # BatchNorm is not implemented in the translator *and* we need to fold Scale
    # layers into the new C2 SpatialBN op, hence we remove the batch norm layers
    # and apply custom translations code
    bn_weights = remove_spatial_bn_layers(caffenet, caffenet_weights)
    # Set num, channel, height and width for blobs that use shape.dim instead
    normalize_shape(caffenet_weights)
    # Translate the rest of the model
    net, pretrained_weights = caffe_translator.TranslateModel(
        caffenet, caffenet_weights
    )
    pretrained_weights.protos.extend(bn_weights)
    return net, pretrained_weights


if __name__ == '__main__':
    args = parse_args()
    assert os.path.exists(args.prototxt_file_name), \
        'Prototxt file does not exist'
    assert os.path.exists(args.caffemodel_file_name), \
        'Weights file does not exist'
    net, weights = load_and_convert_caffe_model(
        args.prototxt_file_name, args.caffemodel_file_name
    )
    pickle_weights(args.out_file_name, weights)


================================================
FILE: tools/reval.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Reval = re-eval. Re-evaluate saved detections."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import os
import sys

from detectron.core.config import cfg
from detectron.datasets import task_evaluation
from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
from detectron.utils.logging import setup_logging
import detectron.core.config as core_config


def parse_args():
    parser = argparse.ArgumentParser(description='Re-evaluate results')
    parser.add_argument(
        'output_dir', nargs=1, help='results directory', type=str
    )
    parser.add_argument(
        '--dataset',
        dest='dataset_name',
        help='dataset to re-evaluate',
        default='voc_2007_test',
        type=str
    )
    parser.add_argument(
        '--matlab',
        dest='matlab_eval',
        help='use matlab for evaluation',
        action='store_true'
    )
    parser.add_argument(
        '--comp',
        dest='comp_mode',
        help='competition mode',
        action='store_true'
    )
    parser.add_argument(
        '--cfg',
        dest='cfg_file',
        help='optional config file',
        default=None,
        type=str
    )

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args


def do_reval(dataset_name, output_dir, args):
    dataset = JsonDataset(dataset_name)
    dets = load_object(os.path.join(output_dir, 'detections.pkl'))

    # Override config with the one saved in the detections file
    if args.cfg_file is not None:
        core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg']))
    else:
        core_config._merge_a_into_b(core_config.load_cfg(dets['cfg']), cfg)
    results = task_evaluation.evaluate_all(
        dataset,
        dets['all_boxes'],
        dets['all_segms'],
        dets['all_keyps'],
        output_dir,
        use_matlab=args.matlab_eval
    )
    task_evaluation.log_copy_paste_friendly_results(results)


if __name__ == '__main__':
    setup_logging(__name__)
    args = parse_args()
    if args.comp_mode:
        cfg.TEST.COMPETITION_MODE = True
    output_dir = os.path.abspath(args.output_dir[0])
    do_reval(args.dataset_name, output_dir, args)


================================================
FILE: tools/test_net.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Perform inference on one or more datasets."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import os
import pprint
import sys
import time

from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import merge_cfg_from_file
from detectron.core.config import merge_cfg_from_list
from detectron.core.test_engine import run_inference
from detectron.utils.logging import setup_logging
import detectron.utils.c2 as c2_utils

c2_utils.import_detectron_ops()

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)


def parse_args():
    parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
    parser.add_argument(
        '--cfg',
        dest='cfg_file',
        help='optional config file',
        default=None,
        type=str
    )
    parser.add_argument(
        '--wait',
        dest='wait',
        help='wait until net file exists',
        default=True,
        type=bool
    )
    parser.add_argument(
        '--vis', dest='vis', help='visualize detections', action='store_true'
    )
    parser.add_argument(
        '--multi-gpu-testing',
        dest='multi_gpu_testing',
        help='using cfg.NUM_GPUS for inference',
        action='store_true'
    )
    parser.add_argument(
        '--range',
        dest='range',
        help='start (inclusive) and end (exclusive) indices',
        default=None,
        type=int,
        nargs=2
    )
    parser.add_argument(
        'opts',
        help='See detectron/core/config.py for all options',
        default=None,
        nargs=argparse.REMAINDER
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    logger = setup_logging(__name__)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    logger.info('Testing with config:')
    logger.info(pprint.pformat(cfg))

    while not os.path.exists(cfg.TEST.WEIGHTS) and args.wait:
        logger.info('Waiting for \'{}\' to exist...'.format(cfg.TEST.WEIGHTS))
        time.sleep(10)

    run_inference(
        cfg.TEST.WEIGHTS,
        ind_range=args.range,
        multi_gpu_testing=args.multi_gpu_testing,
        check_expected_results=True,
    )


================================================
FILE: tools/train_net.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Train a network with Detectron."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
import logging
import numpy as np
import pprint
import sys

from caffe2.python import workspace

from detectron.core.config import assert_and_infer_cfg
from detectron.core.config import cfg
from detectron.core.config import merge_cfg_from_file
from detectron.core.config import merge_cfg_from_list
from detectron.core.test_engine import run_inference
from detectron.utils.logging import setup_logging
import detectron.utils.c2 as c2_utils
import detectron.utils.train

c2_utils.import_contrib_ops()
c2_utils.import_detectron_ops()

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)


def parse_args():
    parser = argparse.ArgumentParser(
        description='Train a network with Detectron'
    )
    parser.add_argument(
        '--cfg',
        dest='cfg_file',
        help='Config file for training (and optionally testing)',
        default=None,
        type=str
    )
    parser.add_argument(
        '--multi-gpu-testing',
        dest='multi_gpu_testing',
        help='Use cfg.NUM_GPUS GPUs for inference',
        action='store_true'
    )
    parser.add_argument(
        '--skip-test',
        dest='skip_test',
        help='Do not test the final model',
        action='store_true'
    )
    parser.add_argument(
        'opts',
        help='See detectron/core/config.py for all options',
        default=None,
        nargs=argparse.REMAINDER
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']
    )
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # Execute the training run
    checkpoints = detectron.utils.train.train_model()
    # Test the trained model
    if not args.skip_test:
        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)


def test_model(model_file, multi_gpu_testing, opts=None):
    """Test a model."""
    # Clear memory before inference
    workspace.ResetWorkspace()
    # Run inference
    run_inference(
        model_file, multi_gpu_testing=multi_gpu_testing,
        check_expected_results=True,
    )


if __name__ == '__main__':
    main()


================================================
FILE: tools/visualize_results.py
================================================
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Script for visualizing results saved in a detections.pkl file."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import cv2
import os
import sys

from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
import detectron.utils.vis as vis_utils

# OpenCL may be enabled by default in OpenCV3; disable it because it's not
# thread safe and causes unwanted GPU memory allocations.
cv2.ocl.setUseOpenCL(False)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dataset',
        dest='dataset',
        help='dataset',
        default='coco_2014_minival',
        type=str
    )
    parser.add_argument(
        '--detections',
        dest='detections',
        help='detections pkl file',
        default='',
        type=str
    )
    parser.add_argument(
        '--thresh',
        dest='thresh',
        help='detection prob threshold',
        default=0.9,
        type=float
    )
    parser.add_argument(
        '--output-dir',
        dest='output_dir',
        help='output directory',
        default='./tmp/vis-output',
        type=str
    )
    parser.add_argument(
        '--first',
        dest='first',
        help='only visualize the first k images',
        default=0,
        type=int
    )
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()
    return args


def vis(dataset, detections_pkl, thresh, output_dir, limit=0):
    ds = JsonDataset(dataset)
    roidb = ds.get_roidb()

    dets = load_object(detections_pkl)

    assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \
        'Expected detections pkl file in the format used by test_engine.py'

    all_boxes = dets['all_boxes']
    all_segms = dets['all_segms']
    all_keyps = dets['all_keyps']

    def id_or_index(ix, val):
        if len(val) == 0:
            return val
        else:
            return val[ix]

    for ix, entry in enumerate(roidb):
        if limit > 0 and ix >= limit:
            break
        if ix % 10 == 0:
            print('{:d}/{:d}'.format(ix + 1, len(roidb)))

        im = cv2.imread(entry['image'])
        im_name = os.path.splitext(os.path.basename(entry['image']))[0]

        cls_boxes_i = [
            id_or_index(ix, cls_k_boxes) for cls_k_boxes in all_boxes
        ]
        cls_segms_i = [
            id_or_index(ix, cls_k_segms) for cls_k_segms in all_segms
        ]
        cls_keyps_i = [
            id_or_index(ix, cls_k_keyps) for cls_k_keyps in all_keyps
        ]

        vis_utils.vis_one_image(
            im[:, :, ::-1],
            '{:d}_{:s}'.format(ix, im_name),
            os.path.join(output_dir, 'vis'),
            cls_boxes_i,
            segms=cls_segms_i,
            keypoints=cls_keyps_i,
            thresh=thresh,
            box_alpha=0.8,
            dataset=ds,
            show_class=True
        )


if __name__ == '__main__':
    opts = parse_args()
    vis(
        opts.dataset,
        opts.detections,
        opts.thresh,
        opts.output_dir,
        limit=opts.first
    )