[
  {
    "path": ".github/issue_template.md",
    "content": "## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING\n1. Please thoroughly read README.md, INSTALL.md, GETTING_STARTED.md, and FAQ.md\n2. Please search existing *open and closed* issues in case your issue has already been reported\n3. Please try to debug the issue in case you can solve it on your own before posting\n\n## After following steps 1-3 above and agreeing to provide the detailed information requested below, you may continue with posting your issue\n(**Delete this line and the text above it.**)\n\n### Expected results\n\nWhat did you expect to see?\n\n### Actual results\n\nWhat did you observe instead?\n\n### Detailed steps to reproduce\n\nE.g.:\n\n```\nThe command that you ran\n```\n\n### System information\n\n* Operating system: ?\n* Compiler version: ?\n* CUDA version: ?\n* cuDNN version: ?\n* NVIDIA driver version: ?\n* GPU models (for all devices if they are not all the same): ?\n* `PYTHONPATH` environment variable: ?\n* `python --version` output: ?\n* Anything else that seems relevant: ?\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# Shared objects\n*.so\n\n# Distribution / packaging\nbuild/\n*.egg-info/\n*.egg\n\n# Temporary files\n*.swn\n*.swo\n*.swp\n\n# Dataset symlinks\ndetectron/datasets/data/*\n!detectron/datasets/data/README.md\n\n# Generated C files\ndetectron/utils/cython_*.c\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)\n\n# Find the Caffe2 package.\n# Caffe2 exports the required targets, so find_package should work for\n# the standard Caffe2 installation. If you encounter problems with finding\n# the Caffe2 package, make sure you have run `make install` when installing\n# Caffe2 (`make install` populates your share/cmake/Caffe2).\nfind_package(Caffe2 REQUIRED)\n\nif (${CAFFE2_VERSION} VERSION_LESS 0.8.2)\n  # Pre-0.8.2 caffe2 does not have proper interface libraries set up, so we\n  # will rely on the old path.\n  message(WARNING\n      \"You are using an older version of Caffe2 (version \" ${CAFFE2_VERSION}\n      \"). Please consider moving to a newer version.\")\n  include(cmake/legacy/legacymake.cmake)\n  return()\nendif()\n\n# Add compiler flags.\nset(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -std=c11\")\nset(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing\")\n\n# Print configuration summary.\ninclude(cmake/Summary.cmake)\ndetectron_print_config_summary()\n\n# Collect custom ops sources.\nfile(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)\nfile(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)\n\n# Install custom CPU ops lib.\nadd_library(\n    caffe2_detectron_custom_ops SHARED\n    ${CUSTOM_OPS_CPU_SRCS})\n\ntarget_include_directories(\n    caffe2_detectron_custom_ops PRIVATE\n    ${CAFFE2_INCLUDE_DIRS})\n\ntarget_link_libraries(caffe2_detectron_custom_ops caffe2_library)\ninstall(TARGETS caffe2_detectron_custom_ops DESTINATION lib)\n\n# Install custom GPU ops lib, if gpu is present.\nif (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)\n  # Additional -I prefix is required for CMake versions before commit (< 3.7):\n  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594\n  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})\n  CUDA_ADD_LIBRARY(\n      caffe2_detectron_custom_ops_gpu SHARED\n      ${CUSTOM_OPS_CPU_SRCS}\n      ${CUSTOM_OPS_GPU_SRCS})\n\n  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu_library)\n  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)\nendif()\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and maintainers pledge to make participation in our project and\nour community a harassment-free experience for everyone, regardless of age, body\nsize, disability, ethnicity, sex characteristics, gender identity and expression,\nlevel of experience, education, socio-economic status, nationality, personal\nappearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment\ninclude:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or\n  advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic\n  address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable\nbehavior and are expected to take appropriate and fair corrective action in\nresponse to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviors that they deem inappropriate,\nthreatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies within all project spaces, and it also applies when\nan individual is representing the project or its community in public spaces.\nExamples of representing a project or community include using an official\nproject e-mail address, posting via an official social media account, or acting\nas an appointed representative at an online or offline event. Representation of\na project may be further defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported by contacting the project team at <opensource-conduct@fb.com>. All\ncomplaints will be reviewed and investigated and will result in a response that\nis deemed necessary and appropriate to the circumstances. The project team is\nobligated to maintain confidentiality with regard to the reporter of an incident.\nFurther details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good\nfaith may face temporary or permanent repercussions as determined by other\nmembers of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,\navailable at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see\nhttps://www.contributor-covenant.org/faq\n\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Detectron\nWe want to make contributing to this project as easy and transparent as\npossible.\n\n## Our Development Process\nMinor changes and improvements will be released on an ongoing basis. Larger\nchanges (e.g., changesets implementing a new paper) will be released on a more\nperiodic basis.\n\n## Pull Requests\nWe actively welcome your pull requests.\n\n1. Fork the repo and create your branch from `master`.\n2. If you've added code that should be tested, add tests.\n3. If you've changed APIs, update the documentation.\n4. Ensure the test suite passes.\n5. Make sure your code lints.\n6. Ensure no regressions in baseline model speed and accuracy.\n7. If you haven't already, complete the Contributor License Agreement (\"CLA\").\n\n## Contributor License Agreement (\"CLA\")\nIn order to accept your pull request, we need you to submit a CLA. You only need\nto do this once to work on any of Facebook's open source projects.\n\nComplete your CLA here: <https://code.facebook.com/cla>\n\n## Issues\nGitHub issues will be largely unattended and are mainly intended as a community\nforum for collectively debugging issues, hopefully leading to pull requests with\nfixes when appropriate.\n\n## Coding Style  \n* 4 spaces for indentation rather than tabs\n* 80 character line length\n* PEP8 formatting\n\n## License\nBy contributing to Detectron, you agree that your contributions will be licensed\nunder the LICENSE file in the root directory of this source tree.\n"
  },
  {
    "path": "FAQ.md",
    "content": "# FAQ\n\nThis document covers frequently asked questions.\n\n- For general information about Detectron, please see [`README.md`](README.md).\n- For installation instructions, please see [`INSTALL.md`](INSTALL.md).\n- For a quick getting started guide, please see [`GETTING_STARTED.md`](GETTING_STARTED.md).\n\n#### Q: How do I compute validation AP during training?\n\n**A:** Detectron does not compute validation statistics (e.g., AP) during training because this slows training. Instead, we've implemented a \"validation monitor\", which is a process that polls for new model checkpoints saved by a training job and when one is found performs inference with it by scheduling a job with `tools/test_net.py` asynchronously using free GPUs in our cluster. We have not released the validation monitor because (1) it's a relatively thin wrapper on top of `tools/train_net.py` and (2) the little code that comprises it is specific to our cluster and would not be generally useful.\n\n#### Q: How do I restrict Detectron to use only a subset of the GPUs on a server?\n\n**A:** Don't modify the code; use the [`CUDA_VISIBLE_DEVICES`](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) environment variable instead.\n\n#### Q: Detection on one image is really slow compared to the reported performance, why?\n\nA: Various algorithms and caches (e.g., from `cudnn`) take some time to warm up. Peak inference performance will not be reached until after a few images have been processed.\n\nAlso potentially relevant: inference with Mask R-CNN on high-resolution images may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image.\n\n\n#### Q: How do I implement a custom Caffe2 CPU or GPU operator for use in Detectron?\n\n**A:** Detectron uses a number of specialized Caffe2 operators that are distributed via the [Caffe2 Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron) as part of the core Caffe2 GitHub repository. If you'd like to implement a custom Caffe2 operator for your project, we have written a toy example illustrating how to add an operator under the Detectron source tree; please see [`detectron/ops/zero_even_op.*`](detectron/ops/) and [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py). For more background on writing Caffe2 operators please consult the [Caffe2 documentation](https://caffe2.ai/docs/custom-operators.html).\n\n#### Q: How do I use Detectron to train a model on a custom dataset?\n\n**A:** If possible, we strongly recommend that you first convert the custom dataset annotation format to the [COCO API json format](http://cocodataset.org/#download). Then, add your dataset to the [dataset catalog](detectron/datasets/dataset_catalog.py) so that Detectron can use it for training and inference. If your dataset cannot be converted to the COCO API json format, then it's likely that more significant code modifications will be required. If the dataset you're adding is popular, please consider making the converted annotations publicly available; If code modifications are required, please consider submitting a pull request.\n"
  },
  {
    "path": "GETTING_STARTED.md",
    "content": "# Using Detectron\n\nThis document provides brief tutorials covering Detectron for inference and training on the COCO dataset.\n\n- For general information about Detectron, please see [`README.md`](README.md).\n- For installation instructions, please see [`INSTALL.md`](INSTALL.md).\n\n## Inference with Pretrained Models\n\n#### 1. Directory of Image Files\nTo run inference on a directory of image files (`demo/*.jpg` in this example), you can use the `infer_simple.py` tool. In this example, we're using an end-to-end trained Mask R-CNN model with a ResNet-101-FPN backbone from the model zoo:\n\n```\npython tools/infer_simple.py \\\n    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \\\n    --output-dir /tmp/detectron-visualizations \\\n    --image-ext jpg \\\n    --wts https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n    demo\n```\n\nDetectron should automatically download the model from the URL specified by the `--wts` argument. This tool will output visualizations of the detections in PDF format in the directory specified by `--output-dir`. Here's an example of the output you should expect to see (for copyright information about the demo images see [`demo/NOTICE`](demo/NOTICE)).\n\n<div align=\"center\">\n  <img src=\"demo/output/17790319373_bd19b24cfc_k_example_output.jpg\" width=\"700px\" />\n  <p>Example Mask R-CNN output.</p>\n</div>\n\n**Notes:**\n\n- When running inference on your own high-resolution images, Mask R-CNN may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image.\n\n\n#### 2. COCO Dataset\nThis example shows how to run an end-to-end trained Mask R-CNN model from the model zoo using a single GPU for inference. As configured, this will run inference on all images in `coco_2014_minival` (which must be properly installed).\n\n```\npython tools/test_net.py \\\n    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \\\n    TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n    NUM_GPUS 1\n```\n\nRunning inference with the same model using `$N` GPUs (e.g., `N=8`).\n\n```\npython tools/test_net.py \\\n    --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \\\n    --multi-gpu-testing \\\n    TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n    NUM_GPUS $N\n```\n\nOn an NVIDIA Tesla P100 GPU, inference should take about 130-140 ms per image for this example.\n\n\n## Training a Model with Detectron\n\nThis is a tiny tutorial showing how to train a model on COCO. The model will be an end-to-end trained Faster R-CNN using a ResNet-50-FPN backbone. For the purpose of this tutorial, we'll use a short training schedule and a small input image size so that training and inference will be relatively fast. As a result, the box AP on COCO will be relatively low compared to our [baselines](MODEL_ZOO.md). This example is provided for instructive purposes only (i.e., not for comparing against publications).\n\n#### 1. Training with 1 GPU\n\n```\npython tools/train_net.py \\\n    --cfg configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml \\\n    OUTPUT_DIR /tmp/detectron-output\n```\n\n**Expected results:**\n\n- Output (models, validation set detections, etc.) will be saved under `/tmp/detectron-output`\n- On a Maxwell generation GPU (e.g., M40), training should take around 4.2 hours\n- Inference time should be around 80ms / image (also on an M40)\n- Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs)\n\n### 2. Multi-GPU Training\n\nWe've also provided configs to illustrate training with 2, 4, and 8 GPUs using learning schedules that will be approximately equivalent to the one used with 1 GPU above. The configs are located at: `configs/getting_started/tutorial_{2,4,8}gpu_e2e_faster_rcnn_R-50-FPN.yaml`. For example, launching a training job with 2 GPUs will look like this:\n\n```\npython tools/train_net.py \\\n    --multi-gpu-testing \\\n    --cfg configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml \\\n    OUTPUT_DIR /tmp/detectron-output\n```\n\nNote that we've also added the `--multi-gpu-testing` flag to instruct Detectron to parallelize inference over multiple GPUs (2 in this example; see `NUM_GPUS` in the config file) after training has finished.\n\n**Expected results:**\n\n- Training should take around 2.3 hours (2 x M40)\n- Inference time should be around 80ms / image (but in parallel on 2 GPUs, so half the total time)\n- Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs)\n\nTo understand how learning schedules are adjusted (the \"linear scaling rule\"), please study these tutorial config files and read our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). **Aside from this tutorial, all of our released configs make use of 8 GPUs. If you will be using fewer than 8 GPUs for training (or do anything else that changes the minibatch size), it is essential that you understand how to manipulate training schedules according to the linear scaling rule.**\n\n**Notes:**\n\n- This training example uses a relatively low GPU-compute model and thus overhead from Caffe2 Python ops is relatively high. As a result, scaling as the number of GPUs is increased from 2 to 8 is relatively poor (e.g., training with 8 GPUs takes about 0.9 hours, only 4.5x faster than with 1 GPU). As larger, more GPU-compute heavy models are used, the scaling improves.\n"
  },
  {
    "path": "INSTALL.md",
    "content": "# Installing Detectron\n\nThis document covers how to install Detectron, its dependencies (including Caffe2), and the COCO dataset.\n\n- For general information about Detectron, please see [`README.md`](README.md).\n\n**Requirements:**\n\n- NVIDIA GPU, Linux, Python2\n- Caffe2, various standard Python packages, and the COCO API; Instructions for installing these dependencies are found below\n\n**Notes:**\n\n- Detectron operators currently do not have CPU implementation; a GPU system is required.\n- Detectron has been tested extensively with CUDA 8.0 and cuDNN 6.0.21.\n\n## Caffe2\n\nTo install Caffe2 with CUDA support, follow the [installation instructions](https://caffe2.ai/docs/getting-started.html) from the [Caffe2 website](https://caffe2.ai/). **If you already have Caffe2 installed, make sure to update your Caffe2 to a version that includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron).**\n\nPlease ensure that your Caffe2 installation was successful before proceeding by running the following commands and checking their output as directed in the comments.\n\n```\n# To check if Caffe2 build was successful\npython -c 'from caffe2.python import core' 2>/dev/null && echo \"Success\" || echo \"Failure\"\n\n# To check if Caffe2 GPU build was successful\n# This must print a number > 0 in order to use Detectron\npython -c 'from caffe2.python import workspace; print(workspace.NumCudaDevices())'\n```\n\nIf the `caffe2` Python package is not found, you likely need to adjust your `PYTHONPATH` environment variable to include its location (`/path/to/caffe2/build`, where `build` is the Caffe2 CMake build directory).\n\n## Other Dependencies\n\nInstall the [COCO API](https://github.com/cocodataset/cocoapi):\n\n```\n# COCOAPI=/path/to/clone/cocoapi\ngit clone https://github.com/cocodataset/cocoapi.git $COCOAPI\ncd $COCOAPI/PythonAPI\n# Install into global site-packages\nmake install\n# Alternatively, if you do not have permissions or prefer\n# not to install the COCO API into global site-packages\npython setup.py install --user\n```\n\nNote that instructions like `# COCOAPI=/path/to/install/cocoapi` indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (`COCOAPI` in this case) accordingly.\n\n## Detectron\n\nClone the Detectron repository:\n\n```\n# DETECTRON=/path/to/clone/detectron\ngit clone https://github.com/facebookresearch/detectron $DETECTRON\n```\n\nInstall Python dependencies:\n\n```\npip install -r $DETECTRON/requirements.txt\n```\n\nSet up Python modules:\n\n```\ncd $DETECTRON && make\n```\n\nCheck that Detectron tests pass (e.g. for [`SpatialNarrowAsOp test`](detectron/tests/test_spatial_narrow_as_op.py)):\n\n```\npython $DETECTRON/detectron/tests/test_spatial_narrow_as_op.py\n```\n\n## That's All You Need for Inference\n\nAt this point, you can run inference using pretrained Detectron models. Take a look at our [inference tutorial](GETTING_STARTED.md) for an example. If you want to train models on the COCO dataset, then please continue with the installation instructions.\n\n## Datasets\n\nDetectron finds datasets via symlinks from `detectron/datasets/data` to the actual locations where the dataset images and annotations are stored. For instructions on how to create symlinks for COCO and other datasets, please see [`detectron/datasets/data/README.md`](detectron/datasets/data/README.md).\n\nAfter symlinks have been created, that's all you need to start training models.\n\n## Advanced Topic: Custom Operators for New Research Projects\n\nPlease read the custom operators section of the [`FAQ`](FAQ.md) first.\n\nFor convenience, we provide CMake support for building custom operators. All custom operators are built into a single library that can be loaded dynamically from Python.\nPlace your custom operator implementation under [`detectron/ops/`](detectron/ops/) and see [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py) for an example of how to load custom operators from Python.\n\nBuild the custom operators library:\n\n```\ncd $DETECTRON && make ops\n```\n\nCheck that the custom operator tests pass:\n\n```\npython $DETECTRON/detectron/tests/test_zero_even_op.py\n```\n\n## Docker Image\n\nWe provide a [`Dockerfile`](docker/Dockerfile) that you can use to build a Detectron image on top of a Caffe2 image that satisfies the requirements outlined at the top. If you would like to use a Caffe2 image different from the one we use by default, please make sure that it includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron).\n\nBuild the image:\n\n```\ncd $DETECTRON/docker\ndocker build -t detectron:c2-cuda9-cudnn7 .\n```\n\nRun the image (e.g. for [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py)):\n\n```\nnvidia-docker run --rm -it detectron:c2-cuda9-cudnn7 python detectron/tests/test_batch_permutation_op.py\n```\n\n## Troubleshooting\n\nIn case of Caffe2 installation problems, please read the troubleshooting section of the relevant Caffe2 [installation instructions](https://caffe2.ai/docs/getting-started.html) first. In the following, we provide additional troubleshooting tips for Caffe2 and Detectron.\n\n### Caffe2 Operator Profiling\n\nCaffe2 comes with performance [`profiling`](https://github.com/pytorch/pytorch/tree/master/caffe2/contrib/prof)\nsupport which you may find useful for benchmarking or debugging your operators\n(see [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py) for example usage).\nProfiling support is not built by default and you can enable it by setting\nthe `-DUSE_PROF=ON` flag when running Caffe2 CMake.\n\n### CMake Cannot Find CUDA and cuDNN\n\nSometimes CMake has trouble with finding CUDA and cuDNN dirs on your machine.\n\nWhen building Caffe2, you can point CMake to CUDA and cuDNN dirs by running:\n\n```\ncmake .. \\\n  # insert your Caffe2 CMake flags here\n  -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \\\n  -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir\n```\n\nSimilarly, when building custom Detectron operators you can use:\n\n```\ncd $DETECTRON\nmkdir -p build && cd build\ncmake .. \\\n  -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \\\n  -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir\nmake\n```\n\nNote that you can use the same commands to get CMake to use specific versions of CUDA and cuDNN out of possibly multiple versions installed on your machine.\n\n### Protobuf Errors\n\nCaffe2 uses protobuf as its serialization format and requires version `3.2.0` or newer.\nIf your protobuf version is older, you can build protobuf from Caffe2 protobuf submodule and use that version instead.\n\nTo build Caffe2 protobuf submodule:\n\n```\n# CAFFE2=/path/to/caffe2\ncd $CAFFE2/third_party/protobuf/cmake\nmkdir -p build && cd build\ncmake .. \\\n  -DCMAKE_INSTALL_PREFIX=$HOME/c2_tp_protobuf \\\n  -Dprotobuf_BUILD_TESTS=OFF \\\n  -DCMAKE_CXX_FLAGS=\"-fPIC\"\nmake install\n```\n\nTo point Caffe2 CMake to the newly built protobuf:\n\n```\ncmake .. \\\n  # insert your Caffe2 CMake flags here\n  -DPROTOBUF_PROTOC_EXECUTABLE=$HOME/c2_tp_protobuf/bin/protoc \\\n  -DPROTOBUF_INCLUDE_DIR=$HOME/c2_tp_protobuf/include \\\n  -DPROTOBUF_LIBRARY=$HOME/c2_tp_protobuf/lib64/libprotobuf.a\n```\n\nYou may also experience problems with protobuf if you have both system and anaconda packages installed.\nThis could lead to problems as the versions could be mixed at compile time or at runtime.\nThis issue can also be overcome by following the commands from above.\n\n### Caffe2 Python Binaries\n\nIn case you experience issues with CMake being unable to find the required Python paths when\nbuilding Caffe2 Python binaries (e.g. in virtualenv), you can try pointing Caffe2 CMake to python\nlibrary and include dir by using:\n\n```\ncmake .. \\\n  # insert your Caffe2 CMake flags here\n  -DPYTHON_LIBRARY=$(python -c \"from distutils import sysconfig; print(sysconfig.get_python_lib())\") \\\n  -DPYTHON_INCLUDE_DIR=$(python -c \"from distutils import sysconfig; print(sysconfig.get_python_inc())\")\n```\n\n### Caffe2 with NNPACK Build\n\nDetectron does not require Caffe2 built with NNPACK support. If you face NNPACK related issues during Caffe2 installation, you can safely disable NNPACK by setting the `-DUSE_NNPACK=OFF` CMake flag.\n\n### Caffe2 with OpenCV Build\n\nAnalogously to the NNPACK case above, you can disable OpenCV by setting the `-DUSE_OPENCV=OFF` CMake flag.\n\n### COCO API Undefined Symbol Error\n\nIf you encounter a COCO API import error due to an undefined symbol, as reported [here](https://github.com/cocodataset/cocoapi/issues/35),\nmake sure that your python versions are not getting mixed. For instance, this issue may arise if you have\n[both system and conda numpy installed](https://stackoverflow.com/questions/36190757/numpy-undefined-symbol-pyfpe-jbuf).\n\n### CMake Cannot Find Caffe2\n\nIn case you experience issues with CMake being unable to find the Caffe2 package when building custom operators,\nmake sure you have run `make install` as part of your Caffe2 installation process.\n"
  },
  {
    "path": "LICENSE",
    "content": "Apache License\nVersion 2.0, January 2004\nhttp://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n\"License\" shall mean the terms and conditions for use, reproduction,\nand distribution as defined by Sections 1 through 9 of this document.\n\n\"Licensor\" shall mean the copyright owner or entity authorized by\nthe copyright owner that is granting the License.\n\n\"Legal Entity\" shall mean the union of the acting entity and all\nother entities that control, are controlled by, or are under common\ncontrol with that entity. For the purposes of this definition,\n\"control\" means (i) the power, direct or indirect, to cause the\ndirection or management of such entity, whether by contract or\notherwise, or (ii) ownership of fifty percent (50%) or more of the\noutstanding shares, or (iii) beneficial ownership of such entity.\n\n\"You\" (or \"Your\") shall mean an individual or Legal Entity\nexercising permissions granted by this License.\n\n\"Source\" form shall mean the preferred form for making modifications,\nincluding but not limited to software source code, documentation\nsource, and configuration files.\n\n\"Object\" form shall mean any form resulting from mechanical\ntransformation or translation of a Source form, including but\nnot limited to compiled object code, generated documentation,\nand conversions to other media types.\n\n\"Work\" shall mean the work of authorship, whether in Source or\nObject form, made available under the License, as indicated by a\ncopyright notice that is included in or attached to the work\n(an example is provided in the Appendix below).\n\n\"Derivative Works\" shall mean any work, whether in Source or Object\nform, that is based on (or derived from) the Work and for which the\neditorial revisions, annotations, elaborations, or other modifications\nrepresent, as a whole, an original work of authorship. For the purposes\nof this License, Derivative Works shall not include works that remain\nseparable from, or merely link (or bind by name) to the interfaces of,\nthe Work and Derivative Works thereof.\n\n\"Contribution\" shall mean any work of authorship, including\nthe original version of the Work and any modifications or additions\nto that Work or Derivative Works thereof, that is intentionally\nsubmitted to Licensor for inclusion in the Work by the copyright owner\nor by an individual or Legal Entity authorized to submit on behalf of\nthe copyright owner. For the purposes of this definition, \"submitted\"\nmeans any form of electronic, verbal, or written communication sent\nto the Licensor or its representatives, including but not limited to\ncommunication on electronic mailing lists, source code control systems,\nand issue tracking systems that are managed by, or on behalf of, the\nLicensor for the purpose of discussing and improving the Work, but\nexcluding communication that is conspicuously marked or otherwise\ndesignated in writing by the copyright owner as \"Not a Contribution.\"\n\n\"Contributor\" shall mean Licensor and any individual or Legal Entity\non behalf of whom a Contribution has been received by Licensor and\nsubsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\nthis License, each Contributor hereby grants to You a perpetual,\nworldwide, non-exclusive, no-charge, royalty-free, irrevocable\ncopyright license to reproduce, prepare Derivative Works of,\npublicly display, publicly perform, sublicense, and distribute the\nWork and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\nthis License, each Contributor hereby grants to You a perpetual,\nworldwide, non-exclusive, no-charge, royalty-free, irrevocable\n(except as stated in this section) patent license to make, have made,\nuse, offer to sell, sell, import, and otherwise transfer the Work,\nwhere such license applies only to those patent claims licensable\nby such Contributor that are necessarily infringed by their\nContribution(s) alone or by combination of their Contribution(s)\nwith the Work to which such Contribution(s) was submitted. If You\ninstitute patent litigation against any entity (including a\ncross-claim or counterclaim in a lawsuit) alleging that the Work\nor a Contribution incorporated within the Work constitutes direct\nor contributory patent infringement, then any patent licenses\ngranted to You under this License for that Work shall terminate\nas of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\nWork or Derivative Works thereof in any medium, with or without\nmodifications, and in Source or Object form, provided that You\nmeet the following conditions:\n\n(a) You must give any other recipients of the Work or\nDerivative Works a copy of this License; and\n\n(b) You must cause any modified files to carry prominent notices\nstating that You changed the files; and\n\n(c) You must retain, in the Source form of any Derivative Works\nthat You distribute, all copyright, patent, trademark, and\nattribution notices from the Source form of the Work,\nexcluding those notices that do not pertain to any part of\nthe Derivative Works; and\n\n(d) If the Work includes a \"NOTICE\" text file as part of its\ndistribution, then any Derivative Works that You distribute must\ninclude a readable copy of the attribution notices contained\nwithin such NOTICE file, excluding those notices that do not\npertain to any part of the Derivative Works, in at least one\nof the following places: within a NOTICE text file distributed\nas part of the Derivative Works; within the Source form or\ndocumentation, if provided along with the Derivative Works; or,\nwithin a display generated by the Derivative Works, if and\nwherever such third-party notices normally appear. The contents\nof the NOTICE file are for informational purposes only and\ndo not modify the License. You may add Your own attribution\nnotices within Derivative Works that You distribute, alongside\nor as an addendum to the NOTICE text from the Work, provided\nthat such additional attribution notices cannot be construed\nas modifying the License.\n\nYou may add Your own copyright statement to Your modifications and\nmay provide additional or different license terms and conditions\nfor use, reproduction, or distribution of Your modifications, or\nfor any such Derivative Works as a whole, provided Your use,\nreproduction, and distribution of the Work otherwise complies with\nthe conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\nany Contribution intentionally submitted for inclusion in the Work\nby You to the Licensor shall be under the terms and conditions of\nthis License, without any additional terms or conditions.\nNotwithstanding the above, nothing herein shall supersede or modify\nthe terms of any separate license agreement you may have executed\nwith Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\nnames, trademarks, service marks, or product names of the Licensor,\nexcept as required for reasonable and customary use in describing the\norigin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\nagreed to in writing, Licensor provides the Work (and each\nContributor provides its Contributions) on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\nimplied, including, without limitation, any warranties or conditions\nof TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\nPARTICULAR PURPOSE. You are solely responsible for determining the\nappropriateness of using or redistributing the Work and assume any\nrisks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\nwhether in tort (including negligence), contract, or otherwise,\nunless required by applicable law (such as deliberate and grossly\nnegligent acts) or agreed to in writing, shall any Contributor be\nliable to You for damages, including any direct, indirect, special,\nincidental, or consequential damages of any character arising as a\nresult of this License or out of the use or inability to use the\nWork (including but not limited to damages for loss of goodwill,\nwork stoppage, computer failure or malfunction, or any and all\nother commercial damages or losses), even if such Contributor\nhas been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\nthe Work or Derivative Works thereof, You may choose to offer,\nand charge a fee for, acceptance of support, warranty, indemnity,\nor other liability obligations and/or rights consistent with this\nLicense. However, in accepting such obligations, You may act only\non Your own behalf and on Your sole responsibility, not on behalf\nof any other Contributor, and only if You agree to indemnify,\ndefend, and hold each Contributor harmless for any liability\nincurred by, or claims asserted against, such Contributor by reason\nof your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\nTo apply the Apache License to your work, attach the following\nboilerplate notice, with the fields enclosed by brackets \"[]\"\nreplaced with your own identifying information. (Don't include\nthe brackets!)  The text should be enclosed in the appropriate\ncomment syntax for the file format. We also recommend that a\nfile or class name and description of purpose be included on the\nsame \"printed page\" as the copyright notice for easier\nidentification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\nhttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n"
  },
  {
    "path": "MODEL_ZOO.md",
    "content": "# Detectron Model Zoo and Baselines\n\n## Introduction\n\nThis file documents a large collection of baselines trained with Detectron, primarily in late December 2017. We refer to these results as the *12_2017_baselines*. All configurations for these baselines are located in the `configs/12_2017_baselines` directory. The tables below provide results and useful statistics about training and inference. Links to the trained models as well as their output are provided. Unless noted differently below (see \"Notes\" under each table), the following common settings are used for all training and inference runs.\n\n#### Common Settings and Notes\n\n- All baselines were run on [Big Basin](https://code.facebook.com/posts/1835166200089399/introducing-big-basin) servers with 8 NVIDIA Tesla P100 GPU accelerators (with 16GB GPU memory, CUDA 8.0, and cuDNN 6.0.21).\n- All baselines were trained using 8 GPU data parallel sync SGD with a minibatch size of either 8 or 16 images (see the *im/gpu* column).\n- For training, only horizontal flipping data augmentation was used.\n- For inference, no test-time augmentations (e.g., multiple scales, flipping) were used.\n- All models were trained on the union of `coco_2014_train` and `coco_2014_valminusminival`, which is exactly equivalent to the recently defined `coco_2017_train` dataset.\n- All models were tested on the `coco_2014_minival` dataset, which is exactly equivalent to the recently defined `coco_2017_val` dataset.\n- Inference times are often expressed as \"*X* + *Y*\", in which *X* is time taken in reasonably well-optimized GPU code and *Y* is time taken in unoptimized CPU code. (The CPU code time could be reduced substantially with additional engineering.)\n- Inference results for boxes, masks, and keypoints (\"kps\") are provided in the [COCO json format](http://cocodataset.org/#format-data).\n- The *model id* column is provided for ease of reference.\n- To check downloaded file integrity: for any download URL on this page, simply append `.md5sum` to the URL to download the file's md5 hash.\n- All models and results below are on the [COCO dataset](http://cocodataset.org).\n- Baseline models and results for the [Cityscapes dataset](https://www.cityscapes-dataset.com/) are coming soon!\n\n#### Training Schedules\n\nWe use three training schedules, indicated by the *lr schd* column in the tables below.\n\n- **1x**: For minibatch size 16, this schedule starts at a LR of 0.02 and is decreased by a factor of * 0.1 after 60k and 80k iterations and finally terminates at 90k iterations. This schedules results in 12.17 epochs over the 118,287 images in `coco_2014_train` union `coco_2014_valminusminival` (or equivalently, `coco_2017_train`).\n- **2x**: Twice as long as the 1x schedule with the LR change points scaled proportionally.\n- **s1x** (\"stretched 1x\"): This schedule scales the 1x schedule by roughly 1.44x, but also extends the duration of the first learning rate. With a minibatch size of 16, it reduces the LR by * 0.1 at 100k and 120k iterations, finally ending after 130k iterations.\n\nAll training schedules also use a 500 iteration linear learning rate warm up. When changing the minibatch size between 8 and 16 images, we adjust the number of SGD iterations and the base learning rate according to the principles outlined in our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).\n\n#### License\n\nAll models available for download through this document are licensed under the [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).\n\n#### ImageNet Pretrained Models\n\nThe backbone models pretrained on ImageNet are available in the format used by Detectron. Unless otherwise noted, these models are trained on the standard ImageNet-1k dataset.\n\n- [R-50.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl): converted copy of MSRA's original ResNet-50 model\n- [R-101.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl): converted copy of MSRA's original ResNet-101 model\n- [X-101-64x4d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl): converted copy of FB's original ResNeXt-101-64x4d model trained with Torch7\n- [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB\n- [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl): ResNeXt-152-32x8d model **trained on ImageNet-5k** with Caffe2 at FB (see our [ResNeXt paper](https://arxiv.org/abs/1611.05431) for details on ImageNet-5k)\n\n#### Log Files\n\n[Training and inference logs](https://dl.fbaipublicfiles.com/detectron/logs/model_zoo_12_2017_baseline_logs.tgz) are available for most models in the model zoo.\n\n## Proposal, Box, and Mask Detection Baselines\n\n### RPN Proposal Baselines\n\n<table><tbody>\n<!-- START RPN TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop.<br/>AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>4.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.187</sub></sup></td>\n<td align=\"right\"><sup><sub>4.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.113</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>51.6</sub></sup></td>\n<td align=\"right\"><sup><sub>35998355</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/train/coco_2014_train%3Acoco_2014_valminusminival/rpn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.416</sub></sup></td>\n<td align=\"right\"><sup><sub>10.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.080</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>57.2</sub></sup></td>\n<td align=\"right\"><sup><sub>35998814</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.503</sub></sup></td>\n<td align=\"right\"><sup><sub>12.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.108</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>58.2</sub></sup></td>\n<td align=\"right\"><sup><sub>35998887</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>11.5</sub></sup></td>\n<td align=\"right\"><sup><sub>1.395</sub></sup></td>\n<td align=\"right\"><sup><sub>34.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.292</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>59.4</sub></sup></td>\n<td align=\"right\"><sup><sub>35998956</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>11.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.102</sub></sup></td>\n<td align=\"right\"><sup><sub>27.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.222</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>59.5</sub></sup></td>\n<td align=\"right\"><sup><sub>36760102</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n</tr>\n<!-- END RPN TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- Inference time only includes RPN proposal generation.\n- \"prop. AR\" is proposal average recall at 1000 proposals per image.\n- Proposal download links (\"props\"): \"1\" is `coco_2014_train`; \"2\" is `coco_2014_valminusminival`; and \"3\" is `coco_2014_minival`.\n\n### Fast & Mask R-CNN Baselines Using Precomputed RPN Proposals\n\n<table><tbody>\n<!-- START 2-STAGE TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop.<br/>AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.456</sub></sup></td>\n<td align=\"right\"><sup><sub>22.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.241&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>34.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36224013</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36224013/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml.08_22_00.vHd5BeBP/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224013/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml.08_22_00.vHd5BeBP/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.453</sub></sup></td>\n<td align=\"right\"><sup><sub>45.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.241&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>35.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36224046</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36224046/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml.08_22_57.XFxNqEnL/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224046/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml.08_22_57.XFxNqEnL/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.285</sub></sup></td>\n<td align=\"right\"><sup><sub>7.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.076&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>36.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36225147</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.287</sub></sup></td>\n<td align=\"right\"><sup><sub>14.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.077&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>36.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36225249</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36225249/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml.08_40_18.zoChak1f/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225249/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml.08_40_18.zoChak1f/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.448</sub></sup></td>\n<td align=\"right\"><sup><sub>11.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.102&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>38.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36228880</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36228880/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml.09_25_03.tZuHkSpl/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36228880/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml.09_25_03.tZuHkSpl/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.449</sub></sup></td>\n<td align=\"right\"><sup><sub>22.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.103&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>39.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36228933</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36228933/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml.09_26_27.jkOUTrrk/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36228933/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml.09_26_27.jkOUTrrk/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.994</sub></sup></td>\n<td align=\"right\"><sup><sub>49.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.292&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>40.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36226250</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36226250/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml.08_54_22.u0LaxQsC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36226250/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml.08_54_22.u0LaxQsC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.980</sub></sup></td>\n<td align=\"right\"><sup><sub>98.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.291&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>39.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36226326</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36226326/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml.08_55_54.2F7MP1CD/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36226326/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml.08_55_54.2F7MP1CD/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.721</sub></sup></td>\n<td align=\"right\"><sup><sub>36.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.217&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>40.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37119777</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37119777/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml.06_38_03.d5N36egm/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37119777/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml.06_38_03.d5N36egm/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Fast</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.720</sub></sup></td>\n<td align=\"right\"><sup><sub>72.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.217&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>39.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37121469</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37121469/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml.07_03_53.EPrHk63L/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37121469/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml.07_03_53.EPrHk63L/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.466</sub></sup></td>\n<td align=\"right\"><sup><sub>23.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.252&nbsp;+&nbsp;0.020</sub></sup></td>\n<td align=\"right\"><sup><sub>35.5</sub></sup></td>\n<td align=\"right\"><sup><sub>31.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36224121</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224121/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml.08_24_37.wdU8r5Jo/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.464</sub></sup></td>\n<td align=\"right\"><sup><sub>46.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.253&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>36.9</sub></sup></td>\n<td align=\"right\"><sup><sub>32.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36224151</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36224151/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml.08_25_34.RSN5CVSH/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.377</sub></sup></td>\n<td align=\"right\"><sup><sub>9.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.082&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>37.3</sub></sup></td>\n<td align=\"right\"><sup><sub>33.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36225401</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225401/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml.08_42_04.MocEgrRW/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.377</sub></sup></td>\n<td align=\"right\"><sup><sub>18.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.083&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>37.7</sub></sup></td>\n<td align=\"right\"><sup><sub>34.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36225732</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36225732/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml.08_43_08.gDqBz9zS/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.539</sub></sup></td>\n<td align=\"right\"><sup><sub>13.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.111&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>39.4</sub></sup></td>\n<td align=\"right\"><sup><sub>35.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36229407</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36229407/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml.09_38_04.zbVPo8ZE/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.537</sub></sup></td>\n<td align=\"right\"><sup><sub>26.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.109&nbsp;+&nbsp;0.016</sub></sup></td>\n<td align=\"right\"><sup><sub>40.0</sub></sup></td>\n<td align=\"right\"><sup><sub>35.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36229740</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36229740/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml.09_39_00.Z7O7zOEC/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.3</sub></sup></td>\n<td align=\"right\"><sup><sub>1.036</sub></sup></td>\n<td align=\"right\"><sup><sub>51.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.292&nbsp;+&nbsp;0.016</sub></sup></td>\n<td align=\"right\"><sup><sub>41.3</sub></sup></td>\n<td align=\"right\"><sup><sub>37.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36226382</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36226382/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml.08_56_59.rUCejrBN/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.3</sub></sup></td>\n<td align=\"right\"><sup><sub>1.035</sub></sup></td>\n<td align=\"right\"><sup><sub>103.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.292&nbsp;+&nbsp;0.014</sub></sup></td>\n<td align=\"right\"><sup><sub>41.1</sub></sup></td>\n<td align=\"right\"><sup><sub>36.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36672114</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36672114/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml.08_58_13.aNWCi3U7/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.766</sub></sup></td>\n<td align=\"right\"><sup><sub>38.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.223&nbsp;+&nbsp;0.017</sub></sup></td>\n<td align=\"right\"><sup><sub>41.3</sub></sup></td>\n<td align=\"right\"><sup><sub>37.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37121516</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37121516/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml.07_04_58.CbM22DZg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.765</sub></sup></td>\n<td align=\"right\"><sup><sub>76.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.222&nbsp;+&nbsp;0.014</sub></sup></td>\n<td align=\"right\"><sup><sub>40.7</sub></sup></td>\n<td align=\"right\"><sup><sub>36.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37121596</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37121596/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml.07_05_48.TL22uFaK/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<!-- END 2-STAGE TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone.\n- Inference time *excludes* proposal generation.\n\n### End-to-End Faster & Mask R-CNN Baselines\n\n<table><tbody>\n<!-- START E2E FASTER AND MASK TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop.<br/>AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.566</sub></sup></td>\n<td align=\"right\"><sup><sub>28.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.167&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>34.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857197</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.569</sub></sup></td>\n<td align=\"right\"><sup><sub>56.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.174&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>36.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857281</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857281/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml.01_34_56.ScPH0Z4r/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857281/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml.01_34_56.ScPH0Z4r/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.544</sub></sup></td>\n<td align=\"right\"><sup><sub>13.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.093&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>36.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857345</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.546</sub></sup></td>\n<td align=\"right\"><sup><sub>27.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.092&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>37.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857389</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857389/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml.01_37_22.KSeq0b5q/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857389/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml.01_37_22.KSeq0b5q/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.647</sub></sup></td>\n<td align=\"right\"><sup><sub>16.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.120&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>39.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857890</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.647</sub></sup></td>\n<td align=\"right\"><sup><sub>32.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.119&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>39.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35857952</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35857952/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml.01_39_49.JPwJDh92/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35857952/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml.01_39_49.JPwJDh92/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.9</sub></sup></td>\n<td align=\"right\"><sup><sub>1.057</sub></sup></td>\n<td align=\"right\"><sup><sub>52.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.305&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>41.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35858015</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35858015/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml.01_40_54.1xc565DE/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858015/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml.01_40_54.1xc565DE/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.9</sub></sup></td>\n<td align=\"right\"><sup><sub>1.055</sub></sup></td>\n<td align=\"right\"><sup><sub>105.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.304&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>40.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35858198</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35858198/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml.01_41_46.CX2InaoG/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858198/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml.01_41_46.CX2InaoG/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.799</sub></sup></td>\n<td align=\"right\"><sup><sub>40.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.233&nbsp;+&nbsp;0.004</sub></sup></td>\n<td align=\"right\"><sup><sub>41.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36761737</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Faster</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.800</sub></sup></td>\n<td align=\"right\"><sup><sub>80.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.233&nbsp;+&nbsp;0.003</sub></sup></td>\n<td align=\"right\"><sup><sub>40.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36761786</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36761786/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml.06_33_22.VqFNuxk6/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36761786/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml.06_33_22.VqFNuxk6/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.620</sub></sup></td>\n<td align=\"right\"><sup><sub>31.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.181&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>35.8</sub></sup></td>\n<td align=\"right\"><sup><sub>31.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35858791</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-C4</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>6.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.620</sub></sup></td>\n<td align=\"right\"><sup><sub>62.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.182&nbsp;+&nbsp;0.017</sub></sup></td>\n<td align=\"right\"><sup><sub>37.8</sub></sup></td>\n<td align=\"right\"><sup><sub>32.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35858828</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858828/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml.01_46_47.HBThTerB/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.889</sub></sup></td>\n<td align=\"right\"><sup><sub>22.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.099&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>37.7</sub></sup></td>\n<td align=\"right\"><sup><sub>33.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35858933</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.897</sub></sup></td>\n<td align=\"right\"><sup><sub>44.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.099&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>38.6</sub></sup></td>\n<td align=\"right\"><sup><sub>34.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35859007</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.2</sub></sup></td>\n<td align=\"right\"><sup><sub>1.008</sub></sup></td>\n<td align=\"right\"><sup><sub>25.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.126&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>40.0</sub></sup></td>\n<td align=\"right\"><sup><sub>35.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35861795</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.993</sub></sup></td>\n<td align=\"right\"><sup><sub>49.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.126&nbsp;+&nbsp;0.017</sub></sup></td>\n<td align=\"right\"><sup><sub>40.9</sub></sup></td>\n<td align=\"right\"><sup><sub>36.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35861858</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.217</sub></sup></td>\n<td align=\"right\"><sup><sub>60.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.309&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>42.4</sub></sup></td>\n<td align=\"right\"><sup><sub>37.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36494496</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36494496/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml.07_50_11.fkwVtEvg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.210</sub></sup></td>\n<td align=\"right\"><sup><sub>121.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.309&nbsp;+&nbsp;0.015</sub></sup></td>\n<td align=\"right\"><sup><sub>42.2</sub></sup></td>\n<td align=\"right\"><sup><sub>37.2</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>35859745</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35859745/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml.02_00_30.ESWbND2w/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.961</sub></sup></td>\n<td align=\"right\"><sup><sub>48.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.239&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>42.1</sub></sup></td>\n<td align=\"right\"><sup><sub>37.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36761843</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.975</sub></sup></td>\n<td align=\"right\"><sup><sub>97.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.240&nbsp;+&nbsp;0.016</sub></sup></td>\n<td align=\"right\"><sup><sub>41.7</sub></sup></td>\n<td align=\"right\"><sup><sub>36.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36762092</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36762092/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml.06_37_59.DM5gJYRF/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<!-- END E2E FASTER AND MASK TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- For these models, RPN and the detector are trained jointly and end-to-end.\n- Inference time is fully image-to-detections, *including* proposal generation.\n\n\n### RetinaNet Baselines\n\n<table><tbody>\n<!-- START RETINANET TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop.<br/>AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.483</sub></sup></td>\n<td align=\"right\"><sup><sub>12.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.125</sub></sup></td>\n<td align=\"right\"><sup><sub>35.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768636</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768636/12_2017_baselines/retinanet_R-50-FPN_1x.yaml.08_29_48.t4zc9clc/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768636/12_2017_baselines/retinanet_R-50-FPN_1x.yaml.08_29_48.t4zc9clc/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.482</sub></sup></td>\n<td align=\"right\"><sup><sub>24.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.127</sub></sup></td>\n<td align=\"right\"><sup><sub>35.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768677</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768677/12_2017_baselines/retinanet_R-50-FPN_2x.yaml.08_30_38.sgZIQZQ5/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768677/12_2017_baselines/retinanet_R-50-FPN_2x.yaml.08_30_38.sgZIQZQ5/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.666</sub></sup></td>\n<td align=\"right\"><sup><sub>16.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.156</sub></sup></td>\n<td align=\"right\"><sup><sub>37.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768744</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768744/12_2017_baselines/retinanet_R-101-FPN_1x.yaml.08_31_38.5poQe1ZB/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768744/12_2017_baselines/retinanet_R-101-FPN_1x.yaml.08_31_38.5poQe1ZB/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.666</sub></sup></td>\n<td align=\"right\"><sup><sub>33.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.154</sub></sup></td>\n<td align=\"right\"><sup><sub>37.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768840</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768840/12_2017_baselines/retinanet_R-101-FPN_2x.yaml.08_33_29.grtM0RTf/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768840/12_2017_baselines/retinanet_R-101-FPN_2x.yaml.08_33_29.grtM0RTf/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.613</sub></sup></td>\n<td align=\"right\"><sup><sub>40.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.341</sub></sup></td>\n<td align=\"right\"><sup><sub>39.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768875</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768875/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml.08_34_37.FSXgMpzP/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768875/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml.08_34_37.FSXgMpzP/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.625</sub></sup></td>\n<td align=\"right\"><sup><sub>81.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.339</sub></sup></td>\n<td align=\"right\"><sup><sub>39.2</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36768907</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36768907/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml.08_35_40.pF3nzPpu/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36768907/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml.08_35_40.pF3nzPpu/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.7</sub></sup></td>\n<td align=\"right\"><sup><sub>1.343</sub></sup></td>\n<td align=\"right\"><sup><sub>33.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.277</sub></sup></td>\n<td align=\"right\"><sup><sub>39.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36769563</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36769563/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml.08_42_05.06JTK6vJ/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36769563/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml.08_42_05.06JTK6vJ/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RetinaNet</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.7</sub></sup></td>\n<td align=\"right\"><sup><sub>1.340</sub></sup></td>\n<td align=\"right\"><sup><sub>67.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.276</sub></sup></td>\n<td align=\"right\"><sup><sub>38.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>36769641</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36769641/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml.08_42_55.sUPnwXI5/output/train/coco_2014_train%3Acoco_2014_valminusminival/retinanet/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36769641/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml.08_42_55.sUPnwXI5/output/test/coco_2014_minival/retinanet/detections_coco_2014_minival_results.json\">boxes</a></sub></sup></td>\n</tr>\n<!-- END RETINANET TABLE -->\n</tbody></table>\n\n**Notes:** none\n\n### Mask R-CNN with Bells & Whistles\n\n<table><tbody>\n<!-- START BELLS TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop.<br/>AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>X-152-32x8d-FPN-IN5k</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>1</sub></sup></td>\n<td align=\"right\"><sup><sub>9.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.188</sub></sup></td>\n<td align=\"right\"><sup><sub>85.8</sub></sup></td>\n<td align=\"right\"><sup><sub>12.100&nbsp;+&nbsp;0.046</sub></sup></td>\n<td align=\"right\"><sup><sub>48.1</sub></sup></td>\n<td align=\"right\"><sup><sub>41.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37129812</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37129812/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml.09_35_36.8pzTQKYK/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n<tr>\n<td align=\"left\"><sup><sub>[above without test-time aug.]</sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub>0.325&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>45.2</sub></sup></td>\n<td align=\"right\"><sup><sub>39.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n<td align=\"right\"><sup><sub></sub></sup></td>\n</tr>\n<!-- END BELLS TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- A deeper backbone architecture is used: ResNeXt-**152**-32x8d-FPN\n- The backbone ResNeXt-152-32x8d model was trained on ImageNet-**5k** (not the usual ImageNet-1k)\n- Training uses multi-scale jitter over scales {640, 672, 704, 736, 768, 800}\n- Row 1: test-time augmentations are multi-scale testing over {400, 500, 600, 700, 900, 1000, 1100, 1200} and horizontal flipping (on each scale)\n- Row 2: same model as row 1, but without any test-time augmentation (i.e., same as the common baseline configuration)\n- Like the other results, this is a single model result (it is not an ensemble of models)\n\n## Keypoint Detection Baselines\n\n#### Common Settings for Keypoint Detection Baselines (That Differ from Boxes and Masks)\n\nOur keypoint detection baselines differ from our box and mask baselines in a couple of details:\n\n- Due to less training data for the keypoint detection task compared with boxes and masks, we enable multi-scale jitter during training for all keypoint detection models. (Testing is still without any test-time augmentations by default.)\n- Models are trained only on images from `coco_2014_train` union `coco_2014_valminusminival` that contain at least one person with keypoint annotations (all other images are discarded from the training set).\n- Metrics are reported for the person class only (still run on the entire `coco_2014_minival` dataset).\n\n### Person-Specific RPN Baselines\n\n<table><tbody>\n<!-- START PERSON-ONLY RPN TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop. AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>6.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.391</sub></sup></td>\n<td align=\"right\"><sup><sub>9.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.082</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>64.0</sub></sup></td>\n<td align=\"right\"><sup><sub>35998996</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.504</sub></sup></td>\n<td align=\"right\"><sup><sub>12.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.109</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.2</sub></sup></td>\n<td align=\"right\"><sup><sub>35999521</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>11.5</sub></sup></td>\n<td align=\"right\"><sup><sub>1.394</sub></sup></td>\n<td align=\"right\"><sup><sub>34.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.289</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.9</sub></sup></td>\n<td align=\"right\"><sup><sub>35999553</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>RPN</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>11.6</sub></sup></td>\n<td align=\"right\"><sup><sub>1.104</sub></sup></td>\n<td align=\"right\"><sup><sub>27.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.224</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.2</sub></sup></td>\n<td align=\"right\"><sup><sub>36760438</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;props:&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl\">1</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl\">2</a>,&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl\">3</a></sub></sup></td>\n</tr>\n<!-- END PERSON-ONLY RPN TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- *Metrics are for the person category only.*\n- Inference time only includes RPN proposal generation.\n- \"prop. AR\" is proposal average recall at 1000 proposals per image.\n- Proposal download links (\"props\"): \"1\" is `coco_2014_train`; \"2\" is `coco_2014_valminusminival`; and \"3\" is `coco_2014_minival`. These include all images, not just the ones with valid keypoint annotations.\n\n### Keypoint-Only Mask R-CNN Baselines Using Precomputed RPN Proposals\n\n<table><tbody>\n<!-- START 2-STAGE KEYPOINTS TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop. AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.533</sub></sup></td>\n<td align=\"right\"><sup><sub>13.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.081&nbsp;+&nbsp;0.087</sub></sup></td>\n<td align=\"right\"><sup><sub>52.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>64.1</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37651787</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gene\nralized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/test/keypoints_coco_2014_minival/generalized_rcnn\n/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651787/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml.20_00_48.UiwJsTXB/output/test/keypoints_coco_2014_miniva\nl/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>7.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.533</sub></sup></td>\n<td align=\"right\"><sup><sub>19.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.080&nbsp;+&nbsp;0.085</sub></sup></td>\n<td align=\"right\"><sup><sub>53.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37651887</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gen\neralized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/test/keypoints_coco_2014_minival/generalized_rc\nnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/test/keypoints_coco_2014_min\nival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.668</sub></sup></td>\n<td align=\"right\"><sup><sub>16.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.109&nbsp;+&nbsp;0.080</sub></sup></td>\n<td align=\"right\"><sup><sub>53.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37651996</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/gen\neralized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/test/keypoints_coco_2014_minival/generalized_rc\nnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37651996/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml.20_02_37.eVXnKM2Q/output/test/keypoints_coco_2014_min\nival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.668</sub></sup></td>\n<td align=\"right\"><sup><sub>24.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.108&nbsp;+&nbsp;0.076</sub></sup></td>\n<td align=\"right\"><sup><sub>54.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37652016</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/ge\nneralized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/test/keypoints_coco_2014_minival/generalized_\nrcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37652016/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml.20_03_32.z86wT97d/output/test/keypoints_coco_2014_\nminival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.8</sub></sup></td>\n<td align=\"right\"><sup><sub>1.477</sub></sup></td>\n<td align=\"right\"><sup><sub>36.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.288&nbsp;+&nbsp;0.077</sub></sup></td>\n<td align=\"right\"><sup><sub>55.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.7</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37731079</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminiv\nal/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/test/keypoints_coco_2014_minival/ge\nneralized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731079/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_40_56.wj7Hg7lX/output/test/keypo\nints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.9</sub></sup></td>\n<td align=\"right\"><sup><sub>1.478</sub></sup></td>\n<td align=\"right\"><sup><sub>53.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.286&nbsp;+&nbsp;0.075</sub></sup></td>\n<td align=\"right\"><sup><sub>56.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>67.1</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37731142</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusmini\nval/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/test/keypoints_coco_2014_minival/\ngeneralized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731142/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_41_54.e1sD4Frh/output/test/ke\nypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.9</sub></sup></td>\n<td align=\"right\"><sup><sub>1.215</sub></sup></td>\n<td align=\"right\"><sup><sub>30.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.219&nbsp;+&nbsp;0.084</sub></sup></td>\n<td align=\"right\"><sup><sub>55.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.2</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37730253</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminiv\nal/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/test/keypoints_coco_2014_minival/ge\nneralized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37730253/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_34_24.3G9OcQuR/output/test/keypo\nints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.9</sub></sup></td>\n<td align=\"right\"><sup><sub>1.214</sub></sup></td>\n<td align=\"right\"><sup><sub>43.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.218&nbsp;+&nbsp;0.071</sub></sup></td>\n<td align=\"right\"><sup><sub>55.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>67.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37731010</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusmini\nval/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/test/keypoints_coco_2014_minival/\ngeneralized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37731010/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_39_51.xt1oMzRk/output/test/ke\nypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<!-- END 2-STAGE KEYPOINTS TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- *Metrics are for the person category only.*\n- Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone.\n- Inference time *excludes* proposal generation.\n\n\n### End-to-End Keypoint-Only Mask R-CNN Baselines\n\n<table><tbody>\n<!-- START END-TO-END KEYPOINTS TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;backbone&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>kp AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>prop. AR</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.832</sub></sup></td>\n<td align=\"right\"><sup><sub>20.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.097&nbsp;+&nbsp;0.092</sub></sup></td>\n<td align=\"right\"><sup><sub>53.6</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>64.2</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37697547</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>9.0</sub></sup></td>\n<td align=\"right\"><sup><sub>0.828</sub></sup></td>\n<td align=\"right\"><sup><sub>29.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.096&nbsp;+&nbsp;0.089</sub></sup></td>\n<td align=\"right\"><sup><sub>54.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.4</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37697714</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697714/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml.08_44_03.qrQ0ph6M/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.923</sub></sup></td>\n<td align=\"right\"><sup><sub>23.1</sub></sup></td>\n<td align=\"right\"><sup><sub>0.124&nbsp;+&nbsp;0.084</sub></sup></td>\n<td align=\"right\"><sup><sub>54.5</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>64.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37697946</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37697946/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml.08_45_06.Y14KqbST/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.921</sub></sup></td>\n<td align=\"right\"><sup><sub>33.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.123&nbsp;+&nbsp;0.083</sub></sup></td>\n<td align=\"right\"><sup><sub>55.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>65.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37698009</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>14.1</sub></sup></td>\n<td align=\"right\"><sup><sub>1.655</sub></sup></td>\n<td align=\"right\"><sup><sub>41.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.302&nbsp;+&nbsp;0.079</sub></sup></td>\n<td align=\"right\"><sup><sub>56.3</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37732355</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732355/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml.16_56_16.yv4t4W8N/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-64x4d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>14.1</sub></sup></td>\n<td align=\"right\"><sup><sub>1.731</sub></sup></td>\n<td align=\"right\"><sup><sub>62.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.322&nbsp;+&nbsp;0.074</sub></sup></td>\n<td align=\"right\"><sup><sub>56.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.8</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37732415</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732415/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml.16_57_48.Spqtq3Sf/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>14.2</sub></sup></td>\n<td align=\"right\"><sup><sub>1.410</sub></sup></td>\n<td align=\"right\"><sup><sub>35.3</sub></sup></td>\n<td align=\"right\"><sup><sub>0.235&nbsp;+&nbsp;0.080</sub></sup></td>\n<td align=\"right\"><sup><sub>56.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>66.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37792158</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37792158/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml.16_54_16.LgZeo40k/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>X-101-32x8d-FPN</sub></sup></td>\n<td align=\"left\"><sup><sub>Kps</sub></sup></td>\n<td align=\"left\"><sup><sub>s1x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>14.2</sub></sup></td>\n<td align=\"right\"><sup><sub>1.408</sub></sup></td>\n<td align=\"right\"><sup><sub>50.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.236&nbsp;+&nbsp;0.075</sub></sup></td>\n<td align=\"right\"><sup><sub>56.9</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>67.0</sub></sup></td>\n<td align=\"right\"><sup><sub>-</sub></sup></td>\n<td align=\"right\"><sup><sub>37732318</sub></sup></td>\n<td align=\"left\"><sup><sub><a href=\"https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/train/keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/test/keypoints_coco_2014_minival/generalized_rcnn/bbox_keypoints_coco_2014_minival_results.json\">boxes</a>&nbsp;|&nbsp;<a href=\"https://dl.fbaipublicfiles.com/detectron/37732318/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml.16_55_09.Lx8H5JVu/output/test/keypoints_coco_2014_minival/generalized_rcnn/keypoints_keypoints_coco_2014_minival_results.json\">kps</a></sub></sup></td>\n</tr>\n<!-- END END-TO-END KEYPOINTS TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- *Metrics are for the person category only.*\n- For these models, RPN and the detector are trained jointly and end-to-end.\n- Inference time is fully image-to-detections, *including* proposal generation.\n"
  },
  {
    "path": "Makefile",
    "content": "# Don't use the --user flag for setup.py develop mode with virtualenv.\nDEV_USER_FLAG=$(shell python -c \"import sys; print('' if hasattr(sys, 'real_prefix') else '--user')\")\n\n.PHONY: default\ndefault: dev\n\n.PHONY: install\ninstall:\n\tpython setup.py install\n\n.PHONY: ops\nops:\n\tmkdir -p build && cd build && cmake .. && make -j$(shell nproc)\n\n.PHONY: dev\ndev:\n\tpython setup.py develop $(DEV_USER_FLAG)\n\n.PHONY: clean\nclean:\n\tpython setup.py develop --uninstall $(DEV_USER_FLAG)\n\trm -rf build\n"
  },
  {
    "path": "NOTICE",
    "content": "Portions of this software are derived from py-faster-rcnn.\n\n==============================================================================\npy-faster-rcnn licence\n==============================================================================\n\nFaster R-CNN\n\nThe MIT License (MIT)\n\nCopyright (c) 2015 Microsoft Corporation\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "**Detectron is deprecated. Please see [detectron2](https://github.com/facebookresearch/detectron2), a ground-up rewrite of Detectron in PyTorch.**\n\n# Detectron\n\nDetectron is Facebook AI Research's software system that implements state-of-the-art object detection algorithms, including [Mask R-CNN](https://arxiv.org/abs/1703.06870). It is written in Python and powered by the [Caffe2](https://github.com/caffe2/caffe2) deep learning framework.\n\nAt FAIR, Detectron has enabled numerous research projects, including: [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144), [Mask R-CNN](https://arxiv.org/abs/1703.06870), [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333), [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002), [Non-local Neural Networks](https://arxiv.org/abs/1711.07971), [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370), [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440), [DensePose: Dense Human Pose Estimation In The Wild](https://arxiv.org/abs/1802.00434), and [Group Normalization](https://arxiv.org/abs/1803.08494).\n\n<div align=\"center\">\n  <img src=\"demo/output/33823288584_1d21cf0a26_k_example_output.jpg\" width=\"700px\" />\n  <p>Example Mask R-CNN output.</p>\n</div>\n\n## Introduction\n\nThe goal of Detectron is to provide a high-quality, high-performance\ncodebase for object detection *research*. It is designed to be flexible in order\nto support rapid implementation and evaluation of novel research. Detectron\nincludes implementations of the following object detection algorithms:\n\n- [Mask R-CNN](https://arxiv.org/abs/1703.06870) -- *Marr Prize at ICCV 2017*\n- [RetinaNet](https://arxiv.org/abs/1708.02002) -- *Best Student Paper Award at ICCV 2017*\n- [Faster R-CNN](https://arxiv.org/abs/1506.01497)\n- [RPN](https://arxiv.org/abs/1506.01497)\n- [Fast R-CNN](https://arxiv.org/abs/1504.08083)\n- [R-FCN](https://arxiv.org/abs/1605.06409)\n\nusing the following backbone network architectures:\n\n- [ResNeXt{50,101,152}](https://arxiv.org/abs/1611.05431)\n- [ResNet{50,101,152}](https://arxiv.org/abs/1512.03385)\n- [Feature Pyramid Networks](https://arxiv.org/abs/1612.03144) (with ResNet/ResNeXt)\n- [VGG16](https://arxiv.org/abs/1409.1556)\n\nAdditional backbone architectures may be easily implemented. For more details about these models, please see [References](#references) below.\n\n## Update\n\n- 4/2018: Support Group Normalization - see [`GN/README.md`](./projects/GN/README.md)\n\n## License\n\nDetectron is released under the [Apache 2.0 license](https://github.com/facebookresearch/detectron/blob/master/LICENSE). See the [NOTICE](https://github.com/facebookresearch/detectron/blob/master/NOTICE) file for additional details.\n\n## Citing Detectron\n\nIf you use Detectron in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.\n\n```\n@misc{Detectron2018,\n  author =       {Ross Girshick and Ilija Radosavovic and Georgia Gkioxari and\n                  Piotr Doll\\'{a}r and Kaiming He},\n  title =        {Detectron},\n  howpublished = {\\url{https://github.com/facebookresearch/detectron}},\n  year =         {2018}\n}\n```\n\n## Model Zoo and Baselines\n\nWe provide a large set of baseline results and trained models available for download in the [Detectron Model Zoo](MODEL_ZOO.md).\n\n## Installation\n\nPlease find installation instructions for Caffe2 and Detectron in [`INSTALL.md`](INSTALL.md).\n\n## Quick Start: Using Detectron\n\nAfter installation, please see [`GETTING_STARTED.md`](GETTING_STARTED.md) for brief tutorials covering inference and training with Detectron.\n\n## Getting Help\n\nTo start, please check the [troubleshooting](INSTALL.md#troubleshooting) section of our installation instructions as well as our [FAQ](FAQ.md). If you couldn't find help there, try searching our GitHub issues. We intend the issues page to be a forum in which the community collectively troubleshoots problems.\n\nIf bugs are found, **we appreciate pull requests** (including adding Q&A's to `FAQ.md` and improving our installation instructions and troubleshooting documents). Please see [CONTRIBUTING.md](CONTRIBUTING.md) for more information about contributing to Detectron.\n\n## References\n\n- [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440).\n  Ilija Radosavovic, Piotr Dollár, Ross Girshick, Georgia Gkioxari, and Kaiming He.\n  Tech report, arXiv, Dec. 2017.\n- [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370).\n  Ronghang Hu, Piotr Dollár, Kaiming He, Trevor Darrell, and Ross Girshick.\n  Tech report, arXiv, Nov. 2017.\n- [Non-Local Neural Networks](https://arxiv.org/abs/1711.07971).\n  Xiaolong Wang, Ross Girshick, Abhinav Gupta, and Kaiming He.\n  Tech report, arXiv, Nov. 2017.\n- [Mask R-CNN](https://arxiv.org/abs/1703.06870).\n  Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross Girshick.\n  IEEE International Conference on Computer Vision (ICCV), 2017.\n- [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002).\n  Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, and Piotr Dollár.\n  IEEE International Conference on Computer Vision (ICCV), 2017.\n- [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).\n  Priya Goyal, Piotr Dollár, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He.\n  Tech report, arXiv, June 2017.\n- [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333).\n  Georgia Gkioxari, Ross Girshick, Piotr Dollár, and Kaiming He.\n  Tech report, arXiv, Apr. 2017.\n- [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144).\n  Tsung-Yi Lin, Piotr Dollár, Ross Girshick, Kaiming He, Bharath Hariharan, and Serge Belongie.\n  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017.\n- [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431).\n  Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, and Kaiming He.\n  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017.\n- [R-FCN: Object Detection via Region-based Fully Convolutional Networks](http://arxiv.org/abs/1605.06409).\n  Jifeng Dai, Yi Li, Kaiming He, and Jian Sun.\n  Conference on Neural Information Processing Systems (NIPS), 2016.\n- [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385).\n  Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun.\n  IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016.\n- [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](http://arxiv.org/abs/1506.01497)\n  Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.\n  Conference on Neural Information Processing Systems (NIPS), 2015.\n- [Fast R-CNN](http://arxiv.org/abs/1504.08083).\n  Ross Girshick.\n  IEEE International Conference on Computer Vision (ICCV), 2015.\n"
  },
  {
    "path": "cmake/Summary.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake\n\n# Prints configuration summary.\nfunction (detectron_print_config_summary)\n  message(STATUS \"Summary:\")\n  message(STATUS \"  CMake version        : ${CMAKE_VERSION}\")\n  message(STATUS \"  CMake command        : ${CMAKE_COMMAND}\")\n  message(STATUS \"  System name          : ${CMAKE_SYSTEM_NAME}\")\n  message(STATUS \"  C++ compiler         : ${CMAKE_CXX_COMPILER}\")\n  message(STATUS \"  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}\")\n  message(STATUS \"  CXX flags            : ${CMAKE_CXX_FLAGS}\")\n  message(STATUS \"  Caffe2 version       : ${CAFFE2_VERSION}\")\n  message(STATUS \"  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}\")\n  if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)\n    message(STATUS \"  Caffe2 found CUDA    : True\")\n    message(STATUS \"    CUDA version       : ${CUDA_VERSION}\")\n    message(STATUS \"    CuDNN version      : ${CUDNN_VERSION}\")\n  else()\n    message(STATUS \"  Caffe2 found CUDA    : False\")\n  endif()\nendfunction()\n"
  },
  {
    "path": "cmake/legacy/Cuda.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Cuda.cmake\n\n# Caffe2 cmake utility to prepare for cuda build.\n# This cmake file is called from Dependencies.cmake. You do not need to\n# manually invoke it.\n\n# Known NVIDIA GPU achitectures Caffe2 can be compiled for.\n# Default is set to cuda 9. If we detect the cuda architectores to be less than\n# 9, we will lower it to the corresponding known archs.\nset(Caffe2_known_gpu_archs \"30 35 50 52 60 61 70\") # for CUDA 9.x\nset(Caffe2_known_gpu_archs8 \"20 21(20) 30 35 50 52 60 61\") # for CUDA 8.x\nset(Caffe2_known_gpu_archs7 \"20 21(20) 30 35 50 52\") # for CUDA 7.x\n\n\n################################################################################################\n# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME\n# Usage:\n#   caffe_select_nvcc_arch_flags(out_variable)\nfunction(caffe2_select_nvcc_arch_flags out_variable)\n  # List of arch names\n  set(__archs_names \"Kepler\" \"Maxwell\" \"Pascal\" \"Volta\" \"All\" \"Manual\")\n  set(__archs_name_default \"All\")\n\n  # Set CUDA_ARCH_NAME strings (so it will be seen as dropbox in the CMake GUI)\n  set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING \"Select target NVIDIA GPU architecture\")\n  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS \"\" ${__archs_names})\n  mark_as_advanced(CUDA_ARCH_NAME)\n\n  # Verify CUDA_ARCH_NAME value\n  if(NOT \";${__archs_names};\" MATCHES \";${CUDA_ARCH_NAME};\")\n    string(REPLACE \";\" \", \" __archs_names \"${__archs_names}\")\n    message(FATAL_ERROR \"Invalid CUDA_ARCH_NAME, supported values: ${__archs_names}. Got ${CUDA_ARCH_NAME}\")\n  endif()\n\n  if(${CUDA_ARCH_NAME} STREQUAL \"Manual\")\n    set(CUDA_ARCH_BIN \"\" CACHE STRING\n      \"Specify GPU architectures to build binaries for (BIN(PTX) format is supported)\")\n    set(CUDA_ARCH_PTX \"\" CACHE STRING\n      \"Specify GPU architectures to build PTX intermediate code for\")\n    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)\n  else()\n    unset(CUDA_ARCH_BIN CACHE)\n    unset(CUDA_ARCH_PTX CACHE)\n  endif()\n\n  if(${CUDA_ARCH_NAME} STREQUAL \"Kepler\")\n    set(__cuda_arch_bin \"30 35\")\n  elseif(${CUDA_ARCH_NAME} STREQUAL \"Maxwell\")\n    set(__cuda_arch_bin \"50\")\n  elseif(${CUDA_ARCH_NAME} STREQUAL \"Pascal\")\n    set(__cuda_arch_bin \"60 61\")\n  elseif(${CUDA_ARCH_NAME} STREQUAL \"Volta\")\n    set(__cuda_arch_bin \"70\")\n  elseif(${CUDA_ARCH_NAME} STREQUAL \"All\")\n    set(__cuda_arch_bin ${Caffe2_known_gpu_archs})\n  elseif(${CUDA_ARCH_NAME} STREQUAL \"Manual\")\n    set(__cuda_arch_bin ${CUDA_ARCH_BIN})\n    set(__cuda_arch_ptx ${CUDA_ARCH_PTX})\n  else()\n    message(FATAL_ERROR \"Invalid CUDA_ARCH_NAME\")\n  endif()\n\n  # Remove dots and convert to lists\n  string(REGEX REPLACE \"\\\\.\" \"\" __cuda_arch_bin \"${__cuda_arch_bin}\")\n  string(REGEX REPLACE \"\\\\.\" \"\" __cuda_arch_ptx \"${__cuda_arch_ptx}\")\n  string(REGEX MATCHALL \"[0-9()]+\" __cuda_arch_bin \"${__cuda_arch_bin}\")\n  string(REGEX MATCHALL \"[0-9]+\"   __cuda_arch_ptx \"${__cuda_arch_ptx}\")\n  list(REMOVE_DUPLICATES __cuda_arch_bin)\n  list(REMOVE_DUPLICATES __cuda_arch_ptx)\n\n  set(__nvcc_flags \"\")\n  set(__nvcc_archs_readable \"\")\n\n  # Tell NVCC to add binaries for the specified GPUs\n  foreach(__arch ${__cuda_arch_bin})\n    if(__arch MATCHES \"([0-9]+)\\\\(([0-9]+)\\\\)\")\n      # User explicitly specified PTX for the concrete BIN\n      list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})\n      list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})\n    else()\n      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN\n      list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})\n      list(APPEND __nvcc_archs_readable sm_${__arch})\n    endif()\n  endforeach()\n\n  # Tell NVCC to add PTX intermediate code for the specified architectures\n  foreach(__arch ${__cuda_arch_ptx})\n    list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})\n    list(APPEND __nvcc_archs_readable compute_${__arch})\n  endforeach()\n\n  string(REPLACE \";\" \" \" __nvcc_archs_readable \"${__nvcc_archs_readable}\")\n  set(${out_variable}          ${__nvcc_flags}          PARENT_SCOPE)\n  set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)\nendfunction()\n\n\n################################################################################################\n# Short command for cuda compilation\n# Usage:\n#   caffe_cuda_compile(<objlist_variable> <cuda_files>)\nmacro(caffe2_cuda_compile objlist_variable)\n  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)\n    set(${var}_backup_in_cuda_compile_ \"${${var}}\")\n\n    # we remove /EHa as it generates warnings under windows\n    string(REPLACE \"/EHa\" \"\" ${var} \"${${var}}\")\n\n  endforeach()\n\n  if(APPLE)\n    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)\n  endif()\n\n  cuda_compile(cuda_objcs ${ARGN})\n\n  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)\n    set(${var} \"${${var}_backup_in_cuda_compile_}\")\n    unset(${var}_backup_in_cuda_compile_)\n  endforeach()\n\n  set(${objlist_variable} ${cuda_objcs})\nendmacro()\n\n################################################################################################\n###  Non macro section\n################################################################################################\n\n# Special care for windows platform: we know that 32-bit windows does not support cuda.\nif(${CMAKE_SYSTEM_NAME} STREQUAL \"Windows\")\n  if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8))\n    message(FATAL_ERROR\n            \"CUDA support not available with 32-bit windows. Did you \"\n            \"forget to set Win64 in the generator target?\")\n    return()\n  endif()\nendif()\n\nfind_package(CUDA 7.0 QUIET)\nfind_cuda_helper_libs(curand)  # cmake 2.8.7 compartibility which doesn't search for curand\n\nif(NOT CUDA_FOUND)\n  set(HAVE_CUDA FALSE)\n  return()\nendif()\n\nset(HAVE_CUDA TRUE)\nmessage(STATUS \"CUDA detected: \" ${CUDA_VERSION})\nif (${CUDA_VERSION} LESS 7.0)\n  message(FATAL_ERROR \"Caffe2 requires CUDA 7.0 or later version\")\nelseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x\n  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs7})\n  list(APPEND CUDA_NVCC_FLAGS \"-D_MWAITXINTRIN_H_INCLUDED\")\n  list(APPEND CUDA_NVCC_FLAGS \"-D__STRICT_ANSI__\")\nelseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x\n  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs8})\n  list(APPEND CUDA_NVCC_FLAGS \"-D_MWAITXINTRIN_H_INCLUDED\")\n  list(APPEND CUDA_NVCC_FLAGS \"-D__STRICT_ANSI__\")\n  # CUDA 8 may complain that sm_20 is no longer supported. Suppress the\n  # warning for now.\n  list(APPEND CUDA_NVCC_FLAGS \"-Wno-deprecated-gpu-targets\")\nendif()\n\ncaffe2_include_directories(${CUDA_INCLUDE_DIRS})\nlist(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY}\n                              ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})\n\n# find libcuda.so and lbnvrtc.so\n# For libcuda.so, we will find it under lib, lib64, and then the\n# stubs folder, in case we are building on a system that does not\n# have cuda driver installed. On windows, we also search under the\n# folder lib/x64.\n\nfind_library(CUDA_CUDA_LIB cuda\n    PATHS ${CUDA_TOOLKIT_ROOT_DIR}\n    PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64)\nfind_library(CUDA_NVRTC_LIB nvrtc\n    PATHS ${CUDA_TOOLKIT_ROOT_DIR}\n    PATH_SUFFIXES lib lib64 lib/x64)\n\n# setting nvcc arch flags\ncaffe2_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)\nlist(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})\nmessage(STATUS \"Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}\")\n\nif(CUDA_CUDA_LIB)\n    message(STATUS \"Found libcuda: ${CUDA_CUDA_LIB}\")\n    list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDA_LIB})\nelse()\n    message(FATAL_ERROR \"Cannot find libcuda.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.\")\nendif()\nif(CUDA_NVRTC_LIB)\n  message(STATUS \"Found libnvrtc: ${CUDA_NVRTC_LIB}\")\n  list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_NVRTC_LIB})\nelse()\n    message(FATAL_ERROR \"Cannot find libnvrtc.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.\")\nendif()\n\n# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.\nforeach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)\n  list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})\nendforeach()\n\n# Set C++11 support\nset(CUDA_PROPAGATE_HOST_FLAGS OFF)\nif (NOT MSVC)\n  list(APPEND CUDA_NVCC_FLAGS \"-std=c++14\")\n  list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler -fPIC\")\nendif()\n\n# Debug and Release symbol support\nif (MSVC)\n  if (${CMAKE_BUILD_TYPE} MATCHES \"Release\")\n    if (${BUILD_SHARED_LIBS})\n      list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler -MD\")\n    else()\n      list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler -MT\")\n    endif()\n  elseif(${CMAKE_BUILD_TYPE} MATCHES \"Debug\")\n    message(FATAL_ERROR\n            \"Caffe2 currently does not support the combination of MSVC, Cuda \"\n            \"and Debug mode. Either set USE_CUDA=OFF or set the build type \"\n            \"to Release\")\n    if (${BUILD_SHARED_LIBS})\n      list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler -MDd\")\n    else()\n      list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler -MTd\")\n    endif()\n  else()\n    message(FATAL_ERROR \"Unknown cmake build type: \" ${CMAKE_BUILD_TYPE})\n  endif()\nendif()\n\n\nif(OpenMP_FOUND)\n  list(APPEND CUDA_NVCC_FLAGS \"-Xcompiler ${OpenMP_CXX_FLAGS}\")\nendif()\n\n# Set :expt-relaxed-constexpr to suppress Eigen warnings\nlist(APPEND CUDA_NVCC_FLAGS \"--expt-relaxed-constexpr\")\n\nmark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)\nmark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)\n"
  },
  {
    "path": "cmake/legacy/Dependencies.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Dependencies.cmake\n\n# Find CUDA.\ninclude(cmake/legacy/Cuda.cmake)\nif (HAVE_CUDA)\n  # CUDA 9.x requires GCC version <= 6\n  if ((CUDA_VERSION VERSION_EQUAL   9.0) OR\n      (CUDA_VERSION VERSION_GREATER 9.0  AND CUDA_VERSION VERSION_LESS 10.0))\n    if (CMAKE_C_COMPILER_ID STREQUAL \"GNU\" AND\n        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0 AND\n        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)\n      message(FATAL_ERROR\n        \"CUDA ${CUDA_VERSION} is not compatible with GCC version >= 7. \"\n        \"Use the following option to use another version (for example): \\n\"\n        \"  -DCUDA_HOST_COMPILER=/usr/bin/gcc-6\\n\")\n    endif()\n  # CUDA 8.0 requires GCC version <= 5\n  elseif (CUDA_VERSION VERSION_EQUAL 8.0)\n    if (CMAKE_C_COMPILER_ID STREQUAL \"GNU\" AND\n        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 6.0 AND\n        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)\n      message(FATAL_ERROR\n        \"CUDA 8.0 is not compatible with GCC version >= 6. \"\n        \"Use the following option to use another version (for example): \\n\"\n        \"  -DCUDA_HOST_COMPILER=/usr/bin/gcc-5\\n\")\n    endif()\n  endif()\nendif()\n\n# Find CUDNN.\nif (HAVE_CUDA)\n  find_package(CuDNN REQUIRED)\n  if (CUDNN_FOUND)\n    caffe2_include_directories(${CUDNN_INCLUDE_DIRS})\n  endif()\nendif()\n"
  },
  {
    "path": "cmake/legacy/Modules/FindCuDNN.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Modules/FindCuDNN.cmake\n\n# - Try to find cuDNN\n#\n# The following variables are optionally searched for defaults\n#  CUDNN_ROOT_DIR:            Base directory where all cuDNN components are found\n#\n# The following are set after configuration is done:\n#  CUDNN_FOUND\n#  CUDNN_INCLUDE_DIRS\n#  CUDNN_LIBRARIES\n#  CUDNN_LIBRARY_DIRS\n\ninclude(FindPackageHandleStandardArgs)\n\nset(CUDNN_ROOT_DIR \"\" CACHE PATH \"Folder contains NVIDIA cuDNN\")\n\nfind_path(CUDNN_INCLUDE_DIR cudnn.h\n    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}\n    PATH_SUFFIXES cuda/include include)\n\nfind_library(CUDNN_LIBRARY cudnn\n    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}\n    PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)\n\nfind_package_handle_standard_args(\n    CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY)\n\nif(CUDNN_FOUND)\n\t# get cuDNN version\n  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS)\n\tstring(REGEX MATCH \"define CUDNN_MAJOR * +([0-9]+)\"\n\t\t\t\t CUDNN_VERSION_MAJOR \"${CUDNN_HEADER_CONTENTS}\")\n\tstring(REGEX REPLACE \"define CUDNN_MAJOR * +([0-9]+)\" \"\\\\1\"\n\t\t\t\t CUDNN_VERSION_MAJOR \"${CUDNN_VERSION_MAJOR}\")\n\tstring(REGEX MATCH \"define CUDNN_MINOR * +([0-9]+)\"\n\t\t\t\t CUDNN_VERSION_MINOR \"${CUDNN_HEADER_CONTENTS}\")\n\tstring(REGEX REPLACE \"define CUDNN_MINOR * +([0-9]+)\" \"\\\\1\"\n\t\t\t\t CUDNN_VERSION_MINOR \"${CUDNN_VERSION_MINOR}\")\n\tstring(REGEX MATCH \"define CUDNN_PATCHLEVEL * +([0-9]+)\"\n\t\t\t\t CUDNN_VERSION_PATCH \"${CUDNN_HEADER_CONTENTS}\")\n\tstring(REGEX REPLACE \"define CUDNN_PATCHLEVEL * +([0-9]+)\" \"\\\\1\"\n\t\t\t\t CUDNN_VERSION_PATCH \"${CUDNN_VERSION_PATCH}\")\n  # Assemble cuDNN version\n  if(NOT CUDNN_VERSION_MAJOR)\n    set(CUDNN_VERSION \"?\")\n  else()\n    set(CUDNN_VERSION \"${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}\")\n  endif()\n\n  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})\n  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})\n  message(STATUS \"Found cuDNN: v${CUDNN_VERSION}  (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})\")\n  mark_as_advanced(CUDNN_ROOT_DIR CUDNN_LIBRARY CUDNN_INCLUDE_DIR)\nendif()\n"
  },
  {
    "path": "cmake/legacy/Summary.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake\n\n# Prints configuration summary.\nfunction (detectron_print_config_summary)\n  message(STATUS \"Summary:\")\n  message(STATUS \"  CMake version        : ${CMAKE_VERSION}\")\n  message(STATUS \"  CMake command        : ${CMAKE_COMMAND}\")\n  message(STATUS \"  System name          : ${CMAKE_SYSTEM_NAME}\")\n  message(STATUS \"  C++ compiler         : ${CMAKE_CXX_COMPILER}\")\n  message(STATUS \"  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}\")\n  message(STATUS \"  CXX flags            : ${CMAKE_CXX_FLAGS}\")\n  message(STATUS \"  Caffe2 version       : ${CAFFE2_VERSION}\")\n  message(STATUS \"  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}\")\n  message(STATUS \"  Have CUDA            : ${HAVE_CUDA}\")\n  if (${HAVE_CUDA})\n    message(STATUS \"    CUDA version       : ${CUDA_VERSION}\")\n    message(STATUS \"    CuDNN version      : ${CUDNN_VERSION}\")\n  endif()\nendfunction()\n"
  },
  {
    "path": "cmake/legacy/Utils.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Utils.cmake\n\n################################################################################################\n# Exclude and prepend functionalities\nfunction (exclude OUTPUT INPUT)\nset(EXCLUDES ${ARGN})\nforeach(EXCLUDE ${EXCLUDES})\n        list(REMOVE_ITEM INPUT \"${EXCLUDE}\")\nendforeach()\nset(${OUTPUT} ${INPUT} PARENT_SCOPE)\nendfunction(exclude)\n\nfunction (prepend OUTPUT PREPEND)\nset(OUT \"\")\nforeach(ITEM ${ARGN})\n        list(APPEND OUT \"${PREPEND}${ITEM}\")\nendforeach()\nset(${OUTPUT} ${OUT} PARENT_SCOPE)\nendfunction(prepend)\n\n\n################################################################################################\n# Clears variables from list\n# Usage:\n#   caffe_clear_vars(<variables_list>)\nmacro(caffe_clear_vars)\n  foreach(_var ${ARGN})\n    unset(${_var})\n  endforeach()\nendmacro()\n\n################################################################################################\n# Prints list element per line\n# Usage:\n#   caffe_print_list(<list>)\nfunction(caffe_print_list)\n  foreach(e ${ARGN})\n    message(STATUS ${e})\n  endforeach()\nendfunction()\n\n################################################################################################\n# Reads set of version defines from the header file\n# Usage:\n#   caffe_parse_header(<file> <define1> <define2> <define3> ..)\nmacro(caffe_parse_header FILENAME FILE_VAR)\n  set(vars_regex \"\")\n  set(__parnet_scope OFF)\n  set(__add_cache OFF)\n  foreach(name ${ARGN})\n    if(\"${name}\" STREQUAL \"PARENT_SCOPE\")\n      set(__parnet_scope ON)\n    elseif(\"${name}\" STREQUAL \"CACHE\")\n      set(__add_cache ON)\n    elseif(vars_regex)\n      set(vars_regex \"${vars_regex}|${name}\")\n    else()\n      set(vars_regex \"${name}\")\n    endif()\n  endforeach()\n  if(EXISTS \"${FILENAME}\")\n    file(STRINGS \"${FILENAME}\" ${FILE_VAR} REGEX \"#define[ \\t]+(${vars_regex})[ \\t]+[0-9]+\" )\n  else()\n    unset(${FILE_VAR})\n  endif()\n  foreach(name ${ARGN})\n    if(NOT \"${name}\" STREQUAL \"PARENT_SCOPE\" AND NOT \"${name}\" STREQUAL \"CACHE\")\n      if(${FILE_VAR})\n        if(${FILE_VAR} MATCHES \".+[ \\t]${name}[ \\t]+([0-9]+).*\")\n          string(REGEX REPLACE \".+[ \\t]${name}[ \\t]+([0-9]+).*\" \"\\\\1\" ${name} \"${${FILE_VAR}}\")\n        else()\n          set(${name} \"\")\n        endif()\n        if(__add_cache)\n          set(${name} ${${name}} CACHE INTERNAL \"${name} parsed from ${FILENAME}\" FORCE)\n        elseif(__parnet_scope)\n          set(${name} \"${${name}}\" PARENT_SCOPE)\n        endif()\n      else()\n        unset(${name} CACHE)\n      endif()\n    endif()\n  endforeach()\nendmacro()\n\n################################################################################################\n# Reads single version define from the header file and parses it\n# Usage:\n#   caffe_parse_header_single_define(<library_name> <file> <define_name>)\nfunction(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME)\n  set(${LIBNAME}_H \"\")\n  if(EXISTS \"${HDR_PATH}\")\n    file(STRINGS \"${HDR_PATH}\" ${LIBNAME}_H REGEX \"^#define[ \\t]+${VARNAME}[ \\t]+\\\"[^\\\"]*\\\".*$\" LIMIT_COUNT 1)\n  endif()\n\n  if(${LIBNAME}_H)\n    string(REGEX REPLACE \"^.*[ \\t]${VARNAME}[ \\t]+\\\"([0-9]+).*$\" \"\\\\1\" ${LIBNAME}_VERSION_MAJOR \"${${LIBNAME}_H}\")\n    string(REGEX REPLACE \"^.*[ \\t]${VARNAME}[ \\t]+\\\"[0-9]+\\\\.([0-9]+).*$\" \"\\\\1\" ${LIBNAME}_VERSION_MINOR  \"${${LIBNAME}_H}\")\n    string(REGEX REPLACE \"^.*[ \\t]${VARNAME}[ \\t]+\\\"[0-9]+\\\\.[0-9]+\\\\.([0-9]+).*$\" \"\\\\1\" ${LIBNAME}_VERSION_PATCH \"${${LIBNAME}_H}\")\n    set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)\n    set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)\n    set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)\n    set(${LIBNAME}_VERSION_STRING \"${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}\" PARENT_SCOPE)\n\n    # append a TWEAK version if it exists:\n    set(${LIBNAME}_VERSION_TWEAK \"\")\n    if(\"${${LIBNAME}_H}\" MATCHES \"^.*[ \\t]${VARNAME}[ \\t]+\\\"[0-9]+\\\\.[0-9]+\\\\.[0-9]+\\\\.([0-9]+).*$\")\n      set(${LIBNAME}_VERSION_TWEAK \"${CMAKE_MATCH_1}\" ${ARGN} PARENT_SCOPE)\n    endif()\n    if(${LIBNAME}_VERSION_TWEAK)\n      set(${LIBNAME}_VERSION_STRING \"${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}\" ${ARGN} PARENT_SCOPE)\n    else()\n      set(${LIBNAME}_VERSION_STRING \"${${LIBNAME}_VERSION_STRING}\" ${ARGN} PARENT_SCOPE)\n    endif()\n  endif()\nendfunction()\n\n########################################################################################################\n# An option that the user can select. Can accept condition to control when option is available for user.\n# Usage:\n#   caffe_option(<option_variable> \"doc string\" <initial value or boolean expression> [IF <condition>])\nfunction(caffe_option variable description value)\n  set(__value ${value})\n  set(__condition \"\")\n  set(__varname \"__value\")\n  foreach(arg ${ARGN})\n    if(arg STREQUAL \"IF\" OR arg STREQUAL \"if\")\n      set(__varname \"__condition\")\n    else()\n      list(APPEND ${__varname} ${arg})\n    endif()\n  endforeach()\n  unset(__varname)\n  if(\"${__condition}\" STREQUAL \"\")\n    set(__condition 2 GREATER 1)\n  endif()\n\n  if(${__condition})\n    if(\"${__value}\" MATCHES \";\")\n      if(${__value})\n        option(${variable} \"${description}\" ON)\n      else()\n        option(${variable} \"${description}\" OFF)\n      endif()\n    elseif(DEFINED ${__value})\n      if(${__value})\n        option(${variable} \"${description}\" ON)\n      else()\n        option(${variable} \"${description}\" OFF)\n      endif()\n    else()\n      option(${variable} \"${description}\" ${__value})\n    endif()\n  else()\n    unset(${variable} CACHE)\n  endif()\nendfunction()\n\n##############################################################################\n# Helper function to add as-needed flag around a library.\nfunction(caffe_add_as_needed_flag lib output_var)\n  if(\"${CMAKE_CXX_COMPILER_ID}\" MATCHES \"Clang\")\n    # TODO: Clang seems to not need this flag. Double check.\n    set(${output_var} ${lib} PARENT_SCOPE)\n  elseif(MSVC)\n    # TODO: check what is the behavior of MSVC.\n    # In MSVC, we will add whole archive in default.\n    set(${output_var} ${lib} PARENT_SCOPE)\n  else()\n    # Assume everything else is like gcc: we will need as-needed flag.\n    set(${output_var} -Wl,--no-as-needed ${lib} -Wl,--as-needed PARENT_SCOPE)\n  endif()\nendfunction()\n\n##############################################################################\n# Helper function to add whole_archive flag around a library.\nfunction(caffe_add_whole_archive_flag lib output_var)\n  if(\"${CMAKE_CXX_COMPILER_ID}\" MATCHES \"Clang\")\n    set(${output_var} -Wl,-force_load,$<TARGET_FILE:${lib}> PARENT_SCOPE)\n  elseif(MSVC)\n    # In MSVC, we will add whole archive in default.\n    set(${output_var} -WHOLEARCHIVE:$<TARGET_FILE:${lib}> PARENT_SCOPE)\n  else()\n    # Assume everything else is like gcc\n    set(${output_var} -Wl,--whole-archive ${lib} -Wl,--no-whole-archive PARENT_SCOPE)\n  endif()\nendfunction()\n\n##############################################################################\n# Helper function to add either as-needed, or whole_archive flag around a library.\nfunction(caffe_add_linker_flag lib output_var)\n  if (BUILD_SHARED_LIBS)\n    caffe_add_as_needed_flag(${lib} tmp)\n  else()\n    caffe_add_whole_archive_flag(${lib} tmp)\n  endif()\n  set(${output_var} ${tmp} PARENT_SCOPE)\nendfunction()\n\n##############################################################################\n# Helper function to automatically generate __init__.py files where python\n# sources reside but there are no __init__.py present.\nfunction(caffe_autogen_init_py_files)\n  file(GLOB_RECURSE all_python_files RELATIVE ${PROJECT_SOURCE_DIR}\n       \"${PROJECT_SOURCE_DIR}/caffe2/*.py\")\n  set(python_paths_need_init_py)\n  foreach(python_file ${all_python_files})\n    get_filename_component(python_path ${python_file} PATH)\n    string(REPLACE \"/\" \";\" path_parts ${python_path})\n    set(rebuilt_path ${CMAKE_BINARY_DIR})\n    foreach(path_part ${path_parts})\n      set(rebuilt_path \"${rebuilt_path}/${path_part}\")\n      list(APPEND python_paths_need_init_py ${rebuilt_path})\n    endforeach()\n  endforeach()\n  list(REMOVE_DUPLICATES python_paths_need_init_py)\n  # Since the _pb2.py files are yet to be created, we will need to manually\n  # add them to the list.\n  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe)\n  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe/proto)\n  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe2/proto)\n\n  foreach(tmp ${python_paths_need_init_py})\n    if(NOT EXISTS ${tmp}/__init__.py)\n      # message(STATUS \"Generate \" ${tmp}/__init__.py)\n      file(WRITE ${tmp}/__init__.py \"\")\n    endif()\n  endforeach()\nendfunction()\n\n##############################################################################\n# Creating a Caffe2 binary target with sources specified with relative path.\n# Usage:\n#   caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)\n# If only target_name_or_src is specified, this target is build with one single\n# source file and the target name is autogen from the filename. Otherwise, the\n# target name is given by the first argument and the rest are the source files\n# to build the target.\nfunction(caffe2_binary_target target_name_or_src)\n  if (${ARGN})\n    set(__target ${target_name_or_src})\n    prepend(__srcs \"${CMAKE_CURRENT_SOURCE_DIR}/\" \"${ARGN}\")\n  else()\n    get_filename_component(__target ${target_name_or_src} NAME_WE)\n    prepend(__srcs \"${CMAKE_CURRENT_SOURCE_DIR}/\" \"${target_name_or_src}\")\n  endif()\n  add_executable(${__target} ${__srcs})\n  add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER})\n  target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS})\n  install(TARGETS ${__target} DESTINATION bin)\nendfunction()\n\n##############################################################################\n# Helper function to add paths to system include directories.\n#\n# Anaconda distributions typically contain a lot of packages and some\n# of those can conflict with headers/libraries that must be sourced\n# from elsewhere. This helper ensures that Anaconda paths are always\n# added AFTER other include paths, such that it does not accidentally\n# takes precedence when it shouldn't.\n#\n# This is just a heuristic and does not have any guarantees. We can\n# add other corner cases here (as long as they are generic enough).\n# A complete include path cross checker is a final resort if this\n# hacky approach proves insufficient.\n#\nfunction(caffe2_include_directories)\n  foreach(path IN LISTS ARGN)\n    if (${path} MATCHES \"/anaconda\")\n      include_directories(AFTER SYSTEM ${path})\n    else()\n      include_directories(BEFORE SYSTEM ${path})\n    endif()\n  endforeach()\nendfunction()\n"
  },
  {
    "path": "cmake/legacy/legacymake.cmake",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# This file contains legacy cmake scripts that is going to be removed\n# in a future release.\n\n# Add CMake modules.\nlist(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/legacy/Modules)\n\n# Add compiler flags.\nset(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -std=c11\")\nset(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing\")\n\n# Include Caffe2 CMake utils.\ninclude(cmake/legacy/Utils.cmake)\n\n# Find dependencies.\ninclude(cmake/legacy/Dependencies.cmake)\n\n# Print configuration summary.\ninclude(cmake/legacy/Summary.cmake)\ndetectron_print_config_summary()\n\n# Collect custom ops sources.\nfile(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)\nfile(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)\n\n# Install custom CPU ops lib.\nadd_library(\n     caffe2_detectron_custom_ops SHARED\n     ${CUSTOM_OPS_CPU_SRCS})\n\ntarget_include_directories(\n    caffe2_detectron_custom_ops PRIVATE\n    ${CAFFE2_INCLUDE_DIRS})\ntarget_link_libraries(caffe2_detectron_custom_ops caffe2)\ninstall(TARGETS caffe2_detectron_custom_ops DESTINATION lib)\n\n# Install custom GPU ops lib.\nif (${HAVE_CUDA})\n  # Additional -I prefix is required for CMake versions before commit (< 3.7):\n  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594\n  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})\n  CUDA_ADD_LIBRARY(\n      caffe2_detectron_custom_ops_gpu SHARED\n      ${CUSTOM_OPS_CPU_SRCS}\n      ${CUSTOM_OPS_GPU_SRCS})\n\n  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu)\n  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)\nendif()\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 270000\n  STEPS: [0, 210000, 250000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 270000\n  STEPS: [0, 210000, 250000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml",
    "content": "# WARNING: this script uses **pre-computed** BN-based proposals, and is for quick debugging only.\nMODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 270000\n  STEPS: [0, 210000, 250000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # WEIGHTS: N/A\n  FREEZE_AT: 0\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 270000\n  STEPS: [0, 210000, 250000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\n  USE_GN: True  # Note: use GN on the FPN-specific layers\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform\n  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem\n  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # WEIGHTS: N/A\n  FREEZE_AT: 0\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 6000\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 6000\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  FASTER_RCNN: True\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare\n  RESOLUTION: 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default: GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 6000\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare\n  RESOLUTION: 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default: GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 6000\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet152_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 260000\n  STEPS: [0, 200000, 240000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (640, 672, 704, 736, 768, 800)  # Scale jitter\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  BBOX_VOTE:\n    ENABLED: True\n    VOTE_TH: 0.9\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\n  BBOX_AUG:\n    ENABLED: True\n    SCORE_HEUR: UNION\n    COORD_HEUR: UNION\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: False\n    ASPECT_RATIOS: ()\n    ASPECT_RATIO_H_FLIP: False\n  MASK_AUG:\n    ENABLED: True\n    HEUR: SOFT_AVG\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: False\n    ASPECT_RATIOS: ()\n    ASPECT_RATIO_H_FLIP: False\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 130000\n  STEPS: [0, 100000, 120000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare\n  RESOLUTION: 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default: GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nFAST_RCNN:\n  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head\n  ROI_XFORM_METHOD: RoIAlign\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare\n  RESOLUTION: 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default: GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 360000\n  STEPS: [0, 240000, 320000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  IMS_PER_BATCH: 1\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: retinanet\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 7\n  RPN_MIN_LEVEL: 3\n  COARSEST_STRIDE: 128\n  EXTRA_CONV_LEVELS: True\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nRETINANET:\n  RETINANET_ON: True\n  NUM_CONVS: 4\n  ASPECT_RATIOS: (1.0, 2.0, 0.5)\n  SCALES_PER_OCTAVE: 3\n  ANCHOR_SCALE: 4\n  LOSS_GAMMA: 2.0\n  LOSS_ALPHA: 0.25\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  RPN_STRADDLE_THRESH: -1  # default 0\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_R-50-C4_1x.yaml",
    "content": "MODEL:\n  TYPE: rpn\n  CONV_BODY: ResNet.add_ResNet50_conv4_body\n  NUM_CLASSES: 81\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nRPN:\n  SIZES: (32, 64, 128, 256, 512)\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')\n  SCALE: 800\n  MAX_SIZE: 1333\nUSE_NCCL: False\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 81\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 32\n  WIDTH_PER_GROUP: 8\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body\n  NUM_CLASSES: 2\n  RPN_ONLY: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_RPN: True\n  RPN_MAX_LEVEL: 6\n  RPN_MIN_LEVEL: 2\n  RPN_ANCHOR_START_SIZE: 32\n  RPN_ASPECT_RATIOS: (0.5, 1, 2)\nRESNETS:\n  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models\n  TRANS_FUNC: bottleneck_transformation\n  NUM_GROUPS: 64\n  WIDTH_PER_GROUP: 4\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')\n  SCALE: 800\n  MAX_SIZE: 1333\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 2000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 1\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.0025\n  GAMMA: 0.1\n  MAX_ITER: 60000\n  STEPS: [0, 30000, 40000]\n  # Equivalent schedules with...\n  # 1 GPU:\n  #   BASE_LR: 0.0025\n  #   MAX_ITER: 60000\n  #   STEPS: [0, 30000, 40000]\n  # 2 GPUs:\n  #   BASE_LR: 0.005\n  #   MAX_ITER: 30000\n  #   STEPS: [0, 15000, 20000]\n  # 4 GPUs:\n  #   BASE_LR: 0.01\n  #   MAX_ITER: 15000\n  #   STEPS: [0, 7500, 10000]\n  # 8 GPUs:\n  #   BASE_LR: 0.02\n  #   MAX_ITER: 7500\n  #   STEPS: [0, 3750, 5000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train',)\n  SCALES: (500,)\n  MAX_SIZE: 833\n  BATCH_SIZE_PER_IM: 256\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 500\n  MAX_SIZE: 833\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 2\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.005\n  GAMMA: 0.1\n  MAX_ITER: 30000\n  STEPS: [0, 15000, 20000]\n  # Equivalent schedules with...\n  # 1 GPU:\n  #   BASE_LR: 0.0025\n  #   MAX_ITER: 60000\n  #   STEPS: [0, 30000, 40000]\n  # 2 GPUs:\n  #   BASE_LR: 0.005\n  #   MAX_ITER: 30000\n  #   STEPS: [0, 15000, 20000]\n  # 4 GPUs:\n  #   BASE_LR: 0.01\n  #   MAX_ITER: 15000\n  #   STEPS: [0, 7500, 10000]\n  # 8 GPUs:\n  #   BASE_LR: 0.02\n  #   MAX_ITER: 7500\n  #   STEPS: [0, 3750, 5000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train',)\n  SCALES: (500,)\n  MAX_SIZE: 833\n  BATCH_SIZE_PER_IM: 256\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 500\n  MAX_SIZE: 833\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 4\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.01\n  GAMMA: 0.1\n  MAX_ITER: 15000\n  STEPS: [0, 7500, 10000]\n  # Equivalent schedules with...\n  # 1 GPU:\n  #   BASE_LR: 0.0025\n  #   MAX_ITER: 60000\n  #   STEPS: [0, 30000, 40000]\n  # 2 GPUs:\n  #   BASE_LR: 0.005\n  #   MAX_ITER: 30000\n  #   STEPS: [0, 15000, 20000]\n  # 4 GPUs:\n  #   BASE_LR: 0.01\n  #   MAX_ITER: 15000\n  #   STEPS: [0, 7500, 10000]\n  # 8 GPUs:\n  #   BASE_LR: 0.02\n  #   MAX_ITER: 7500\n  #   STEPS: [0, 3750, 5000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train',)\n  SCALES: (500,)\n  MAX_SIZE: 833\n  BATCH_SIZE_PER_IM: 256\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 500\n  MAX_SIZE: 833\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml",
    "content": "MODEL:\n  TYPE: generalized_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 7500\n  STEPS: [0, 3750, 5000]\n  # Equivalent schedules with...\n  # 1 GPU:\n  #   BASE_LR: 0.0025\n  #   MAX_ITER: 60000\n  #   STEPS: [0, 30000, 40000]\n  # 2 GPUs:\n  #   BASE_LR: 0.005\n  #   MAX_ITER: 30000\n  #   STEPS: [0, 15000, 20000]\n  # 4 GPUs:\n  #   BASE_LR: 0.01\n  #   MAX_ITER: 15000\n  #   STEPS: [0, 7500, 10000]\n  # 8 GPUs:\n  #   BASE_LR: 0.02\n  #   MAX_ITER: 7500\n  #   STEPS: [0, 3750, 5000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train',)\n  SCALES: (500,)\n  MAX_SIZE: 833\n  BATCH_SIZE_PER_IM: 256\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 500\n  MAX_SIZE: 833\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml",
    "content": "MODEL:\n  TYPE: mask_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 81\n  FASTER_RCNN: True\n  MASK_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 180000\n  STEPS: [0, 120000, 160000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nMRCNN:\n  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs\n  RESOLUTION: 28  # (output mask resolution) default 14\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14  # default 7\n  ROI_XFORM_SAMPLING_RATIO: 2  # default 0\n  DILATION: 1  # default 2\n  CONV_INIT: MSRAFill  # default GaussianFill\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')\n  SCALES: (800,)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\n  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level\nTEST:\n  DATASETS: ('coco_2014_minival',)\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level\n  RPN_POST_NMS_TOP_N: 1000\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl\n\n  # -- Test time augmentation example -- #\n  BBOX_AUG:\n    ENABLED: True\n    SCORE_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model\n    COORD_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: False\n    AREA_TH_LO: 2500   # 50^2\n    AREA_TH_HI: 32400  # 180^2\n    ASPECT_RATIOS: ()\n    ASPECT_RATIO_H_FLIP: False\n  MASK_AUG:\n    ENABLED: True\n    HEUR: SOFT_AVG\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: False\n    AREA_TH: 32400  # 180^2\n    ASPECT_RATIOS: ()\n    ASPECT_RATIO_H_FLIP: False\n  BBOX_VOTE:\n    ENABLED: True\n    VOTE_TH: 0.9\n  # -- Test time augmentation example -- #\n\nUSE_NCCL: False\nOUTPUT_DIR: .\n"
  },
  {
    "path": "configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml",
    "content": "MODEL:\n  TYPE: keypoint_rcnn\n  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body\n  NUM_CLASSES: 2\n  KEYPOINTS_ON: True\nNUM_GPUS: 8\nSOLVER:\n  WEIGHT_DECAY: 0.0001\n  LR_POLICY: steps_with_decay\n  BASE_LR: 0.02\n  GAMMA: 0.1\n  MAX_ITER: 90000\n  STEPS: [0, 60000, 80000]\nFPN:\n  FPN_ON: True\n  MULTILEVEL_ROIS: True\n  MULTILEVEL_RPN: True  # accidentally True; disable in the future\nFAST_RCNN:\n  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 7\n  ROI_XFORM_SAMPLING_RATIO: 2\nKRCNN:\n  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX\n  NUM_STACKED_CONVS: 8\n  NUM_KEYPOINTS: 17\n  USE_DECONV_OUTPUT: True\n  CONV_INIT: MSRAFill\n  CONV_HEAD_DIM: 512\n  UP_SCALE: 2\n  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)\n  ROI_XFORM_METHOD: RoIAlign\n  ROI_XFORM_RESOLUTION: 14\n  ROI_XFORM_SAMPLING_RATIO: 2\n  KEYPOINT_CONFIDENCE: bbox\nTRAIN:\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl\n  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')\n  SCALES: (640, 672, 704, 736, 768, 800)\n  MAX_SIZE: 1333\n  BATCH_SIZE_PER_IM: 512\nTEST:\n  DATASETS: ('keypoints_coco_2014_minival',)\n  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)\n  PROPOSAL_LIMIT: 1000\n  SCALE: 800\n  MAX_SIZE: 1333\n  NMS: 0.5\n  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl\n\n  # -- Test time augmentation example -- #\n  BBOX_AUG:\n    ENABLED: True\n    SCORE_HEUR: AVG\n    COORD_HEUR: AVG\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: False\n    AREA_TH_LO: 2500  # 50^2\n    AREA_TH_HI: 32400  # 180^2\n  KPS_AUG:\n    ENABLED: True\n    HEUR: HM_AVG\n    H_FLIP: True\n    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)\n    MAX_SIZE: 2000\n    SCALE_H_FLIP: True\n    SCALE_SIZE_DEP: True\n    AREA_TH: 22500  # 150^2\n    ASPECT_RATIOS: ()\n    ASPECT_RATIO_H_FLIP: False\n  # -- Test time augmentation example -- #\n\nOUTPUT_DIR: .\n"
  },
  {
    "path": "demo/NOTICE",
    "content": "The demo images are licensed as United States government work:\nhttps://www.usa.gov/government-works\n\nThe image files were obtained on Jan 13, 2018 from the following\nURLs.\n\n16004479832_a748d55f21_k.jpg\nhttps://www.flickr.com/photos/archivesnews/16004479832\n\n18124840932_e42b3e377c_k.jpg\nhttps://www.flickr.com/photos/usnavy/18124840932\n\n33887522274_eebd074106_k.jpg\nhttps://www.flickr.com/photos/usaid_pakistan/33887522274\n\n15673749081_767a7fa63a_k.jpg\nhttps://www.flickr.com/photos/usnavy/15673749081\n\n34501842524_3c858b3080_k.jpg\nhttps://www.flickr.com/photos/departmentofenergy/34501842524\n\n24274813513_0cfd2ce6d0_k.jpg\nhttps://www.flickr.com/photos/dhsgov/24274813513\n\n19064748793_bb942deea1_k.jpg\nhttps://www.flickr.com/photos/statephotos/19064748793\n\n33823288584_1d21cf0a26_k.jpg\nhttps://www.flickr.com/photos/cbpphotos/33823288584\n\n17790319373_bd19b24cfc_k.jpg\nhttps://www.flickr.com/photos/secdef/17790319373\n"
  },
  {
    "path": "detectron/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/core/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/core/config.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Detectron config system.\n\nThis file specifies default config options for Detectron. You should not\nchange values in this file. Instead, you should write a config file (in yaml)\nand use merge_cfg_from_file(yaml_file) to load it and override the default\noptions.\n\nMost tools in the tools directory take a --cfg option to specify an override\nfile and an optional list of override (key, value) pairs:\n - See tools/{train,test}_net.py for example code that uses merge_cfg_from_file\n - See configs/*/*.yaml for example config files\n\nDetectron supports a lot of different model types, each of which has a lot of\ndifferent options. The result is a HUGE set of configuration options.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom ast import literal_eval\nfrom future.utils import iteritems\nimport copy\nimport io\nimport logging\nimport numpy as np\nimport os\nimport os.path as osp\nimport six\n\nfrom detectron.utils.collections import AttrDict\nfrom detectron.utils.io import cache_url\n\nlogger = logging.getLogger(__name__)\n\n__C = AttrDict()\n# Consumers can get config by:\n#   from detectron.core.config import cfg\ncfg = __C\n\n# Random note: avoid using '.ON' as a config key since yaml converts it to True;\n# prefer 'ENABLED' instead\n\n# ---------------------------------------------------------------------------- #\n# Training options\n# ---------------------------------------------------------------------------- #\n__C.TRAIN = AttrDict()\n\n# Initialize network with weights from this .pkl file\n__C.TRAIN.WEIGHTS = ''\n\n# Datasets to train on\n# Available dataset list: detectron.datasets.dataset_catalog.datasets()\n# If multiple datasets are listed, the model is trained on their union\n__C.TRAIN.DATASETS = ()\n\n# Scales to use during training\n# Each scale is the pixel size of an image's shortest side\n# If multiple scales are listed, then one is selected uniformly at random for\n# each training image (i.e., scale jitter data augmentation)\n__C.TRAIN.SCALES = (600, )\n\n# Max pixel size of the longest side of a scaled input image\n__C.TRAIN.MAX_SIZE = 1000\n\n# Images *per GPU* in the training minibatch\n# Total images per minibatch = TRAIN.IMS_PER_BATCH * NUM_GPUS\n__C.TRAIN.IMS_PER_BATCH = 2\n\n# RoI minibatch size *per image* (number of regions of interest [ROIs])\n# Total number of RoIs per training minibatch =\n#   TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS\n# E.g., a common configuration is: 512 * 2 * 8 = 8192\n__C.TRAIN.BATCH_SIZE_PER_IM = 64\n\n# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)\n__C.TRAIN.FG_FRACTION = 0.25\n\n# Overlap threshold for an RoI to be considered foreground (if >= FG_THRESH)\n__C.TRAIN.FG_THRESH = 0.5\n\n# Overlap threshold for an RoI to be considered background (class = 0 if\n# overlap in [LO, HI))\n__C.TRAIN.BG_THRESH_HI = 0.5\n__C.TRAIN.BG_THRESH_LO = 0.0\n\n# Use horizontally-flipped images during training?\n__C.TRAIN.USE_FLIPPED = True\n\n# Overlap required between an RoI and a ground-truth box in order for that\n# (RoI, gt box) pair to be used as a bounding-box regression training example\n__C.TRAIN.BBOX_THRESH = 0.5\n\n# Snapshot (model checkpoint) period\n# Divide by NUM_GPUS to determine actual period (e.g., 80000/8 => 10000 iters)\n# to allow for linear training schedule scaling\n__C.TRAIN.SNAPSHOT_ITERS = 80000\n\n# Train using these proposals\n# During training, all proposals specified in the file are used (no limit is\n# applied)\n# Proposal files must be in correspondence with the datasets listed in\n# TRAIN.DATASETS\n__C.TRAIN.PROPOSAL_FILES = ()\n\n# Make minibatches from images that have similar aspect ratios (i.e. both\n# tall and thin or both short and wide)\n# This feature is critical for saving memory (and makes training slightly\n# faster)\n__C.TRAIN.ASPECT_GROUPING = True\n\n# ---------------------------------------------------------------------------- #\n# RPN training options\n# ---------------------------------------------------------------------------- #\n\n# Run GenerateProposals on GPU if set to True\n__C.TRAIN.GENERATE_PROPOSALS_ON_GPU = False\n\n# Minimum overlap required between an anchor and ground-truth box for the\n# (anchor, gt box) pair to be a positive example (IOU >= thresh ==> positive RPN\n# example)\n__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7\n\n# Maximum overlap allowed between an anchor and ground-truth box for the\n# (anchor, gt box) pair to be a negative examples (IOU < thresh ==> negative RPN\n# example)\n__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3\n\n# Target fraction of foreground (positive) examples per RPN minibatch\n__C.TRAIN.RPN_FG_FRACTION = 0.5\n\n# Total number of RPN examples per image\n__C.TRAIN.RPN_BATCH_SIZE_PER_IM = 256\n\n# NMS threshold used on RPN proposals (used during end-to-end training with RPN)\n__C.TRAIN.RPN_NMS_THRESH = 0.7\n\n# Number of top scoring RPN proposals to keep before applying NMS\n# When FPN is used, this is *per FPN level* (not total)\n__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000\n\n# Number of top scoring RPN proposals to keep after applying NMS\n# This is the total number of RPN proposals produced (for both FPN and non-FPN\n# cases)\n__C.TRAIN.RPN_POST_NMS_TOP_N = 2000\n\n# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels\n# Set to -1 or a large value, e.g. 100000, to disable pruning anchors\n__C.TRAIN.RPN_STRADDLE_THRESH = 0\n\n# Proposal height and width both need to be greater than RPN_MIN_SIZE\n# (at orig image scale; not scale used during training or inference)\n__C.TRAIN.RPN_MIN_SIZE = 0\n\n# Filter proposals that are inside of crowd regions by CROWD_FILTER_THRESH\n# \"Inside\" is measured as: proposal-with-crowd intersection area divided by\n# proposal area\n__C.TRAIN.CROWD_FILTER_THRESH = 0.7\n\n# Ignore ground-truth objects with area < this threshold\n__C.TRAIN.GT_MIN_AREA = -1\n\n# Freeze the backbone architecture during training if set to True\n__C.TRAIN.FREEZE_CONV_BODY = False\n\n# Training will resume from the latest snapshot (model checkpoint) found in the\n# output directory\n__C.TRAIN.AUTO_RESUME = True\n\n# Training will copy TRAIN.WEIGHTS and treat it as a candidate checkpoint\n__C.TRAIN.COPY_WEIGHTS = False\n\n# Add StopGrad at a specified stage so the bottom layers are frozen\n__C.TRAIN.FREEZE_AT = 2\n\n\n# ---------------------------------------------------------------------------- #\n# Data loader options (see detectron/roi_data/loader.py for more info)\n# ---------------------------------------------------------------------------- #\n__C.DATA_LOADER = AttrDict()\n\n# Number of Python threads to use for the data loader (warning: using too many\n# threads can cause GIL-based interference with Python Ops leading to *slower*\n# training; 4 seems to be the sweet spot in our experience)\n__C.DATA_LOADER.NUM_THREADS = 4\n\n# Size of the shared minibatch queue\n__C.DATA_LOADER.MINIBATCH_QUEUE_SIZE = 64\n\n# Capacity of the per GPU blobs queue\n__C.DATA_LOADER.BLOBS_QUEUE_CAPACITY = 8\n\n\n# ---------------------------------------------------------------------------- #\n# Inference ('test') options\n# ---------------------------------------------------------------------------- #\n__C.TEST = AttrDict()\n\n# Initialize network with weights from this .pkl file\n__C.TEST.WEIGHTS = ''\n\n# Datasets to test on\n# Available dataset list: detectron.datasets.dataset_catalog.datasets()\n# If multiple datasets are listed, testing is performed on each one sequentially\n__C.TEST.DATASETS = ()\n\n# Scale to use during testing\n__C.TEST.SCALE = 600\n\n# Max pixel size of the longest side of a scaled input image\n__C.TEST.MAX_SIZE = 1000\n\n# Overlap threshold used for non-maximum suppression (suppress boxes with\n# IoU >= this threshold)\n__C.TEST.NMS = 0.3\n\n# Apply Fast R-CNN style bounding-box regression if True\n__C.TEST.BBOX_REG = True\n\n# Test using these proposal files (must correspond with TEST.DATASETS)\n__C.TEST.PROPOSAL_FILES = ()\n\n# Run GenerateProposals on GPU if set to True\n__C.TEST.GENERATE_PROPOSALS_ON_GPU = False\n\n# Limit on the number of proposals per image used during inference\n__C.TEST.PROPOSAL_LIMIT = 2000\n\n# NMS threshold used on RPN proposals\n__C.TEST.RPN_NMS_THRESH = 0.7\n\n# Number of top scoring RPN proposals to keep before applying NMS\n# When FPN is used, this is *per FPN level* (not total)\n__C.TEST.RPN_PRE_NMS_TOP_N = 12000\n\n# Number of top scoring RPN proposals to keep after applying NMS\n# This is the total number of RPN proposals produced (for both FPN and non-FPN\n# cases)\n__C.TEST.RPN_POST_NMS_TOP_N = 2000\n\n# Proposal height and width both need to be greater than RPN_MIN_SIZE\n# (at orig image scale; not scale used during training or inference)\n__C.TEST.RPN_MIN_SIZE = 0\n\n# Maximum number of detections to return per image (100 is based on the limit\n# established for the COCO dataset)\n__C.TEST.DETECTIONS_PER_IM = 100\n\n# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to\n# balance obtaining high recall with not having too many low precision\n# detections that will slow down inference post processing steps (like NMS)\n__C.TEST.SCORE_THRESH = 0.05\n\n# Save detection results files if True\n# If false, results files are cleaned up (they can be large) after local\n# evaluation\n__C.TEST.COMPETITION_MODE = True\n\n# Evaluate detections with the COCO json dataset eval code even if it's not the\n# evaluation code for the dataset (e.g. evaluate PASCAL VOC results using the\n# COCO API to get COCO style AP on PASCAL VOC)\n__C.TEST.FORCE_JSON_DATASET_EVAL = False\n\n# [Inferred value; do not set directly in a config]\n# Indicates if precomputed proposals are used at test time\n# Not set for 1-stage models and 2-stage models with RPN subnetwork enabled\n__C.TEST.PRECOMPUTED_PROPOSALS = True\n\n# Evaluate proposals in class-specific Average Recall (AR).\n# It means that one first computes AR within each category and then averages\n# over the categories. It is not biased towards the AR of frequent categories\n# compared with class-agnostic AR.\n__C.TEST.CLASS_SPECIFIC_AR = False\n\n# ---------------------------------------------------------------------------- #\n# Test-time augmentations for bounding box detection\n# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example\n# ---------------------------------------------------------------------------- #\n__C.TEST.BBOX_AUG = AttrDict()\n\n# Enable test-time augmentation for bounding box detection if True\n__C.TEST.BBOX_AUG.ENABLED = False\n\n# Heuristic used to combine predicted box scores\n#   Valid options: ('ID', 'AVG', 'UNION')\n__C.TEST.BBOX_AUG.SCORE_HEUR = 'UNION'\n\n# Heuristic used to combine predicted box coordinates\n#   Valid options: ('ID', 'AVG', 'UNION')\n__C.TEST.BBOX_AUG.COORD_HEUR = 'UNION'\n\n# Horizontal flip at the original scale (id transform)\n__C.TEST.BBOX_AUG.H_FLIP = False\n\n# Each scale is the pixel size of an image's shortest side\n__C.TEST.BBOX_AUG.SCALES = ()\n\n# Max pixel size of the longer side\n__C.TEST.BBOX_AUG.MAX_SIZE = 4000\n\n# Horizontal flip at each scale\n__C.TEST.BBOX_AUG.SCALE_H_FLIP = False\n\n# Apply scaling based on object size\n__C.TEST.BBOX_AUG.SCALE_SIZE_DEP = False\n__C.TEST.BBOX_AUG.AREA_TH_LO = 50**2\n__C.TEST.BBOX_AUG.AREA_TH_HI = 180**2\n\n# Each aspect ratio is relative to image width\n__C.TEST.BBOX_AUG.ASPECT_RATIOS = ()\n\n# Horizontal flip at each aspect ratio\n__C.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP = False\n\n# ---------------------------------------------------------------------------- #\n# Test-time augmentations for mask detection\n# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example\n# ---------------------------------------------------------------------------- #\n__C.TEST.MASK_AUG = AttrDict()\n\n# Enable test-time augmentation for instance mask detection if True\n__C.TEST.MASK_AUG.ENABLED = False\n\n# Heuristic used to combine mask predictions\n# SOFT prefix indicates that the computation is performed on soft masks\n#   Valid options: ('SOFT_AVG', 'SOFT_MAX', 'LOGIT_AVG')\n__C.TEST.MASK_AUG.HEUR = 'SOFT_AVG'\n\n# Horizontal flip at the original scale (id transform)\n__C.TEST.MASK_AUG.H_FLIP = False\n\n# Each scale is the pixel size of an image's shortest side\n__C.TEST.MASK_AUG.SCALES = ()\n\n# Max pixel size of the longer side\n__C.TEST.MASK_AUG.MAX_SIZE = 4000\n\n# Horizontal flip at each scale\n__C.TEST.MASK_AUG.SCALE_H_FLIP = False\n\n# Apply scaling based on object size\n__C.TEST.MASK_AUG.SCALE_SIZE_DEP = False\n__C.TEST.MASK_AUG.AREA_TH = 180**2\n\n# Each aspect ratio is relative to image width\n__C.TEST.MASK_AUG.ASPECT_RATIOS = ()\n\n# Horizontal flip at each aspect ratio\n__C.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP = False\n\n# ---------------------------------------------------------------------------- #\n# Test-augmentations for keypoints detection\n# configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml\n# ---------------------------------------------------------------------------- #\n__C.TEST.KPS_AUG = AttrDict()\n\n# Enable test-time augmentation for keypoint detection if True\n__C.TEST.KPS_AUG.ENABLED = False\n\n# Heuristic used to combine keypoint predictions\n#   Valid options: ('HM_AVG', 'HM_MAX')\n__C.TEST.KPS_AUG.HEUR = 'HM_AVG'\n\n# Horizontal flip at the original scale (id transform)\n__C.TEST.KPS_AUG.H_FLIP = False\n\n# Each scale is the pixel size of an image's shortest side\n__C.TEST.KPS_AUG.SCALES = ()\n\n# Max pixel size of the longer side\n__C.TEST.KPS_AUG.MAX_SIZE = 4000\n\n# Horizontal flip at each scale\n__C.TEST.KPS_AUG.SCALE_H_FLIP = False\n\n# Apply scaling based on object size\n__C.TEST.KPS_AUG.SCALE_SIZE_DEP = False\n__C.TEST.KPS_AUG.AREA_TH = 180**2\n\n# Eeach aspect ratio is realtive to image width\n__C.TEST.KPS_AUG.ASPECT_RATIOS = ()\n\n# Horizontal flip at each aspect ratio\n__C.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP = False\n\n# ---------------------------------------------------------------------------- #\n# Soft NMS\n# ---------------------------------------------------------------------------- #\n__C.TEST.SOFT_NMS = AttrDict()\n\n# Use soft NMS instead of standard NMS if set to True\n__C.TEST.SOFT_NMS.ENABLED = False\n# See soft NMS paper for definition of these options\n__C.TEST.SOFT_NMS.METHOD = 'linear'\n__C.TEST.SOFT_NMS.SIGMA = 0.5\n# For the soft NMS overlap threshold, we simply use TEST.NMS\n\n# ---------------------------------------------------------------------------- #\n# Bounding box voting (from the Multi-Region CNN paper)\n# ---------------------------------------------------------------------------- #\n__C.TEST.BBOX_VOTE = AttrDict()\n\n# Use box voting if set to True\n__C.TEST.BBOX_VOTE.ENABLED = False\n\n# We use TEST.NMS threshold for the NMS step. VOTE_TH overlap threshold\n# is used to select voting boxes (IoU >= VOTE_TH) for each box that survives NMS\n__C.TEST.BBOX_VOTE.VOTE_TH = 0.8\n\n# The method used to combine scores when doing bounding box voting\n# Valid options include ('ID', 'AVG', 'IOU_AVG', 'GENERALIZED_AVG', 'QUASI_SUM')\n__C.TEST.BBOX_VOTE.SCORING_METHOD = 'ID'\n\n# Hyperparameter used by the scoring method (it has different meanings for\n# different methods)\n__C.TEST.BBOX_VOTE.SCORING_METHOD_BETA = 1.0\n\n\n# ---------------------------------------------------------------------------- #\n# Model options\n# ---------------------------------------------------------------------------- #\n__C.MODEL = AttrDict()\n\n# The type of model to use\n# The string must match a function in the modeling.model_builder module\n# (e.g., 'generalized_rcnn', 'mask_rcnn', ...)\n__C.MODEL.TYPE = ''\n\n# The backbone conv body to use\n# The string must match a function that is imported in modeling.model_builder\n# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN\n# backbone)\n__C.MODEL.CONV_BODY = ''\n\n# Number of classes in the dataset; must be set\n# E.g., 81 for COCO (80 foreground + 1 background)\n__C.MODEL.NUM_CLASSES = -1\n\n# Use a class agnostic bounding box regressor instead of the default per-class\n# regressor\n__C.MODEL.CLS_AGNOSTIC_BBOX_REG = False\n\n# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets\n# These are empirically chosen to approximately lead to unit variance targets\n__C.MODEL.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)\n\n# The meaning of FASTER_RCNN depends on the context (training vs. inference):\n# 1) During training, FASTER_RCNN = True means that end-to-end training will be\n#    used to jointly train the RPN subnetwork and the Fast R-CNN subnetwork\n#    (Faster R-CNN = RPN + Fast R-CNN).\n# 2) During inference, FASTER_RCNN = True means that the model's RPN subnetwork\n#    will be used to generate proposals rather than relying on precomputed\n#    proposals. Note that FASTER_RCNN = True can be used at inference time even\n#    if the Faster R-CNN model was trained with stagewise training (which\n#    consists of alternating between RPN and Fast R-CNN training in a way that\n#    finally leads to a single network).\n__C.MODEL.FASTER_RCNN = False\n\n# Indicates the model makes instance mask predictions (as in Mask R-CNN)\n__C.MODEL.MASK_ON = False\n\n# Indicates the model makes keypoint predictions (as in Mask R-CNN for\n# keypoints)\n__C.MODEL.KEYPOINTS_ON = False\n\n# Indicates the model's computation terminates with the production of RPN\n# proposals (i.e., it outputs proposals ONLY, no actual object detections)\n__C.MODEL.RPN_ONLY = False\n\n# Caffe2 net execution type\n# Use 'prof_dag' to get profiling statistics\n__C.MODEL.EXECUTION_TYPE = 'dag'\n\n\n# ---------------------------------------------------------------------------- #\n# RetinaNet options\n# ---------------------------------------------------------------------------- #\n__C.RETINANET = AttrDict()\n\n# RetinaNet is used (instead of Fast/er/Mask R-CNN/R-FCN/RPN) if True\n__C.RETINANET.RETINANET_ON = False\n\n# Anchor aspect ratios to use\n__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)\n\n# Anchor scales per octave\n__C.RETINANET.SCALES_PER_OCTAVE = 3\n\n# At each FPN level, we generate anchors based on their scale, aspect_ratio,\n# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE\n__C.RETINANET.ANCHOR_SCALE = 4\n\n# Convolutions to use in the cls and bbox tower\n# NOTE: this doesn't include the last conv for logits\n__C.RETINANET.NUM_CONVS = 4\n\n# Weight for bbox_regression loss\n__C.RETINANET.BBOX_REG_WEIGHT = 1.0\n\n# Smooth L1 loss beta for bbox regression\n__C.RETINANET.BBOX_REG_BETA = 0.11\n\n# During inference, #locs to select based on cls score before NMS is performed\n# per FPN level\n__C.RETINANET.PRE_NMS_TOP_N = 1000\n\n# IoU overlap ratio for labeling an anchor as positive\n# Anchors with >= iou overlap are labeled positive\n__C.RETINANET.POSITIVE_OVERLAP = 0.5\n\n# IoU overlap ratio for labeling an anchor as negative\n# Anchors with < iou overlap are labeled negative\n__C.RETINANET.NEGATIVE_OVERLAP = 0.4\n\n# Focal loss parameter: alpha\n__C.RETINANET.LOSS_ALPHA = 0.25\n\n# Focal loss parameter: gamma\n__C.RETINANET.LOSS_GAMMA = 2.0\n\n# Prior prob for the positives at the beginning of training. This is used to set\n# the bias init for the logits layer\n__C.RETINANET.PRIOR_PROB = 0.01\n\n# Whether classification and bbox branch tower should be shared or not\n__C.RETINANET.SHARE_CLS_BBOX_TOWER = False\n\n# Use class specific bounding box regression instead of the default class\n# agnostic regression\n__C.RETINANET.CLASS_SPECIFIC_BBOX = False\n\n# Whether softmax should be used in classification branch training\n__C.RETINANET.SOFTMAX = False\n\n# Inference cls score threshold, anchors with score > INFERENCE_TH are\n# considered for inference\n__C.RETINANET.INFERENCE_TH = 0.05\n\n\n# ---------------------------------------------------------------------------- #\n# Solver options\n# Note: all solver options are used exactly as specified; the implication is\n# that if you switch from training on 1 GPU to N GPUs, you MUST adjust the\n# solver configuration accordingly. We suggest using gradual warmup and the\n# linear learning rate scaling rule as described in\n# \"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour\" Goyal et al.\n# https://arxiv.org/abs/1706.02677\n# ---------------------------------------------------------------------------- #\n__C.SOLVER = AttrDict()\n\n# Base learning rate for the specified schedule\n__C.SOLVER.BASE_LR = 0.001\n\n# Schedule type (see functions in utils.lr_policy for options)\n# E.g., 'step', 'steps_with_decay', ...\n__C.SOLVER.LR_POLICY = 'step'\n\n# Some LR Policies (by example):\n# 'step'\n#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** (cur_iter // SOLVER.STEP_SIZE)\n# 'steps_with_decay'\n#   SOLVER.STEPS = [0, 60000, 80000]\n#   SOLVER.GAMMA = 0.1\n#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** current_step\n#   iters [0, 59999] are in current_step = 0, iters [60000, 79999] are in\n#   current_step = 1, and so on\n# 'steps_with_lrs'\n#   SOLVER.STEPS = [0, 60000, 80000]\n#   SOLVER.LRS = [0.02, 0.002, 0.0002]\n#   lr = LRS[current_step]\n# 'cosine_decay'\n#   lr = SOLVER.BASE_LR * (cos(PI * cur_iter / SOLVER.MAX_ITER) * 0.5 + 0.5)\n# 'exp_decay'\n#   lr smoothly decays from SOLVER.BASE_LR to SOLVER.GAMMA * SOLVER.BASE_LR\n#   lr = SOLVER.BASE_LR * exp(np.log(SOLVER.GAMMA) * cur_iter / SOLVER.MAX_ITER)\n\n# Hyperparameter used by the specified policy\n# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step\n# For 'exp_decay', SOLVER.GAMMA is the ratio between the final and initial LR.\n__C.SOLVER.GAMMA = 0.1\n\n# Uniform step size for 'steps' policy\n__C.SOLVER.STEP_SIZE = 30000\n\n# Non-uniform step iterations for 'steps_with_decay' or 'steps_with_lrs'\n# policies\n__C.SOLVER.STEPS = []\n\n# Learning rates to use with 'steps_with_lrs' policy\n__C.SOLVER.LRS = []\n\n# Maximum number of SGD iterations\n__C.SOLVER.MAX_ITER = 40000\n\n# Momentum to use with SGD\n__C.SOLVER.MOMENTUM = 0.9\n\n# L2 regularization hyperparameter\n__C.SOLVER.WEIGHT_DECAY = 0.0005\n# L2 regularization hyperparameter for GroupNorm's parameters\n__C.SOLVER.WEIGHT_DECAY_GN = 0.0\n\n# Warm up to SOLVER.BASE_LR over this number of SGD iterations\n__C.SOLVER.WARM_UP_ITERS = 500\n\n# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR\n__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0\n\n# WARM_UP_METHOD can be either 'constant' or 'linear' (i.e., gradual)\n__C.SOLVER.WARM_UP_METHOD = 'linear'\n\n# Scale the momentum update history by new_lr / old_lr when updating the\n# learning rate (this is correct given MomentumSGDUpdateOp)\n__C.SOLVER.SCALE_MOMENTUM = True\n# Only apply the correction if the relative LR change exceeds this threshold\n# (prevents ever change in linear warm up from scaling the momentum by a tiny\n# amount; momentum scaling is only important if the LR change is large)\n__C.SOLVER.SCALE_MOMENTUM_THRESHOLD = 1.1\n\n# Suppress logging of changes to LR unless the relative change exceeds this\n# threshold (prevents linear warm up from spamming the training log)\n__C.SOLVER.LOG_LR_CHANGE_THRESHOLD = 1.1\n\n\n# ---------------------------------------------------------------------------- #\n# Fast R-CNN options\n# ---------------------------------------------------------------------------- #\n__C.FAST_RCNN = AttrDict()\n\n# The type of RoI head to use for bounding box classification and regression\n# The string must match a function this is imported in modeling.model_builder\n# (e.g., 'head_builder.add_roi_2mlp_head' to specify a two hidden layer MLP)\n__C.FAST_RCNN.ROI_BOX_HEAD = ''\n\n# Hidden layer dimension when using an MLP for the RoI box head\n__C.FAST_RCNN.MLP_HEAD_DIM = 1024\n\n# Hidden Conv layer dimension when using Convs for the RoI box head\n__C.FAST_RCNN.CONV_HEAD_DIM = 256\n# Number of stacked Conv layers in the RoI box head\n__C.FAST_RCNN.NUM_STACKED_CONVS = 4\n\n# RoI transformation function (e.g., RoIPool or RoIAlign)\n# (RoIPoolF is the same as RoIPool; ignore the trailing 'F')\n__C.FAST_RCNN.ROI_XFORM_METHOD = 'RoIPoolF'\n\n# Number of grid sampling points in RoIAlign (usually use 2)\n# Only applies to RoIAlign\n__C.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO = 0\n\n# RoI transform output resolution\n# Note: some models may have constraints on what they can use, e.g. they use\n# pretrained FC layers like in VGG16, and will ignore this option\n__C.FAST_RCNN.ROI_XFORM_RESOLUTION = 14\n\n\n# ---------------------------------------------------------------------------- #\n# RPN options\n# ---------------------------------------------------------------------------- #\n__C.RPN = AttrDict()\n\n# [Infered value; do not set directly in a config]\n# Indicates that the model contains an RPN subnetwork\n__C.RPN.RPN_ON = False\n\n# RPN anchor sizes given in absolute pixels w.r.t. the scaled network input\n# Note: these options are *not* used by FPN RPN; see FPN.RPN* options\n__C.RPN.SIZES = (64, 128, 256, 512)\n\n# Stride of the feature map that RPN is attached\n__C.RPN.STRIDE = 16\n\n# RPN anchor aspect ratios\n__C.RPN.ASPECT_RATIOS = (0.5, 1, 2)\n\n\n# ---------------------------------------------------------------------------- #\n# FPN options\n# ---------------------------------------------------------------------------- #\n__C.FPN = AttrDict()\n\n# FPN is enabled if True\n__C.FPN.FPN_ON = False\n\n# Channel dimension of the FPN feature levels\n__C.FPN.DIM = 256\n\n# Initialize the lateral connections to output zero if True\n__C.FPN.ZERO_INIT_LATERAL = False\n\n# Stride of the coarsest FPN level\n# This is needed so the input can be padded properly\n__C.FPN.COARSEST_STRIDE = 32\n\n#\n# FPN may be used for just RPN, just object detection, or both\n#\n\n# Use FPN for RoI transform for object detection if True\n__C.FPN.MULTILEVEL_ROIS = False\n# Hyperparameters for the RoI-to-FPN level mapping heuristic\n__C.FPN.ROI_CANONICAL_SCALE = 224  # s0\n__C.FPN.ROI_CANONICAL_LEVEL = 4  # k0: where s0 maps to\n# Coarsest level of the FPN pyramid\n__C.FPN.ROI_MAX_LEVEL = 5\n# Finest level of the FPN pyramid\n__C.FPN.ROI_MIN_LEVEL = 2\n\n# Use FPN for RPN if True\n__C.FPN.MULTILEVEL_RPN = False\n# Coarsest level of the FPN pyramid\n__C.FPN.RPN_MAX_LEVEL = 6\n# Finest level of the FPN pyramid\n__C.FPN.RPN_MIN_LEVEL = 2\n# FPN RPN anchor aspect ratios\n__C.FPN.RPN_ASPECT_RATIOS = (0.5, 1, 2)\n# RPN anchors start at this size on RPN_MIN_LEVEL\n# The anchor size doubled each level after that\n# With a default of 32 and levels 2 to 6, we get anchor sizes of 32 to 512\n__C.FPN.RPN_ANCHOR_START_SIZE = 32\n# Use extra FPN levels, as done in the RetinaNet paper\n__C.FPN.EXTRA_CONV_LEVELS = False\n# Use GroupNorm in the FPN-specific layers (lateral, etc.)\n__C.FPN.USE_GN = False\n\n\n# ---------------------------------------------------------------------------- #\n# Mask R-CNN options (\"MRCNN\" means Mask R-CNN)\n# ---------------------------------------------------------------------------- #\n__C.MRCNN = AttrDict()\n\n# The type of RoI head to use for instance mask prediction\n# The string must match a function this is imported in modeling.model_builder\n# (e.g., 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs')\n__C.MRCNN.ROI_MASK_HEAD = ''\n\n# Resolution of mask predictions\n__C.MRCNN.RESOLUTION = 14\n\n# RoI transformation function and associated options\n__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'\n\n# RoI transformation function (e.g., RoIPool or RoIAlign)\n__C.MRCNN.ROI_XFORM_RESOLUTION = 7\n\n# Number of grid sampling points in RoIAlign (usually use 2)\n# Only applies to RoIAlign\n__C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0\n\n# Number of channels in the mask head\n__C.MRCNN.DIM_REDUCED = 256\n\n# Use dilated convolution in the mask head\n__C.MRCNN.DILATION = 2\n\n# Upsample the predicted masks by this factor\n__C.MRCNN.UPSAMPLE_RATIO = 1\n\n# Use a fully-connected layer to predict the final masks instead of a conv layer\n__C.MRCNN.USE_FC_OUTPUT = False\n\n# Weight initialization method for the mask head and mask output layers\n__C.MRCNN.CONV_INIT = 'GaussianFill'\n\n# Use class specific mask predictions if True (otherwise use class agnostic mask\n# predictions)\n__C.MRCNN.CLS_SPECIFIC_MASK = True\n\n# Multi-task loss weight for masks\n__C.MRCNN.WEIGHT_LOSS_MASK = 1.0\n\n# Binarization threshold for converting soft masks to hard masks\n__C.MRCNN.THRESH_BINARIZE = 0.5\n\n\n# ---------------------------------------------------------------------------- #\n# Keypoint Mask R-CNN options (\"KRCNN\" = Mask R-CNN with Keypoint support)\n# ---------------------------------------------------------------------------- #\n__C.KRCNN = AttrDict()\n\n# The type of RoI head to use for instance keypoint prediction\n# The string must match a function this is imported in modeling.model_builder\n# (e.g., 'keypoint_rcnn_heads.add_roi_pose_head_v1convX')\n__C.KRCNN.ROI_KEYPOINTS_HEAD = ''\n\n# Output size (and size loss is computed on), e.g., 56x56\n__C.KRCNN.HEATMAP_SIZE = -1\n\n# Use bilinear interpolation to upsample the final heatmap by this factor\n__C.KRCNN.UP_SCALE = -1\n\n# Apply a ConvTranspose layer to the hidden representation computed by the\n# keypoint head prior to predicting the per-keypoint heatmaps\n__C.KRCNN.USE_DECONV = False\n# Channel dimension of the hidden representation produced by the ConvTranspose\n__C.KRCNN.DECONV_DIM = 256\n\n# Use a ConvTranspose layer to predict the per-keypoint heatmaps\n__C.KRCNN.USE_DECONV_OUTPUT = False\n\n# Use dilation in the keypoint head\n__C.KRCNN.DILATION = 1\n\n# Size of the kernels to use in all ConvTranspose operations\n__C.KRCNN.DECONV_KERNEL = 4\n\n# Number of keypoints in the dataset (e.g., 17 for COCO)\n__C.KRCNN.NUM_KEYPOINTS = -1\n\n# Number of stacked Conv layers in keypoint head\n__C.KRCNN.NUM_STACKED_CONVS = 8\n\n# Dimension of the hidden representation output by the keypoint head\n__C.KRCNN.CONV_HEAD_DIM = 256\n\n# Conv kernel size used in the keypoint head\n__C.KRCNN.CONV_HEAD_KERNEL = 3\n# Conv kernel weight filling function\n__C.KRCNN.CONV_INIT = 'GaussianFill'\n\n# Use NMS based on OKS if True\n__C.KRCNN.NMS_OKS = False\n\n# Source of keypoint confidence\n#   Valid options: ('bbox', 'logit', 'prob')\n__C.KRCNN.KEYPOINT_CONFIDENCE = 'bbox'\n\n# Standard ROI XFORM options (see FAST_RCNN or MRCNN options)\n__C.KRCNN.ROI_XFORM_METHOD = 'RoIAlign'\n__C.KRCNN.ROI_XFORM_RESOLUTION = 7\n__C.KRCNN.ROI_XFORM_SAMPLING_RATIO = 0\n\n# Minimum number of labeled keypoints that must exist in a minibatch (otherwise\n# the minibatch is discarded)\n__C.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH = 20\n\n# When infering the keypoint locations from the heatmap, don't scale the heatmap\n# below this minimum size\n__C.KRCNN.INFERENCE_MIN_SIZE = 0\n\n# Multi-task loss weight to use for keypoints\n# Recommended values:\n#   - use 1.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is True\n#   - use 4.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False\n__C.KRCNN.LOSS_WEIGHT = 1.0\n\n# Normalize by the total number of visible keypoints in the minibatch if True.\n# Otherwise, normalize by the total number of keypoints that could ever exist\n# in the minibatch. See comments in modeling.model_builder.add_keypoint_losses\n# for detailed discussion.\n__C.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS = True\n\n\n# ---------------------------------------------------------------------------- #\n# R-FCN options\n# ---------------------------------------------------------------------------- #\n__C.RFCN = AttrDict()\n\n# Position-sensitive RoI pooling output grid size (height and width)\n__C.RFCN.PS_GRID_SIZE = 3\n\n\n# ---------------------------------------------------------------------------- #\n# ResNets options (\"ResNets\" = ResNet and ResNeXt)\n# ---------------------------------------------------------------------------- #\n__C.RESNETS = AttrDict()\n\n# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt\n__C.RESNETS.NUM_GROUPS = 1\n\n# Baseline width of each group\n__C.RESNETS.WIDTH_PER_GROUP = 64\n\n# Place the stride 2 conv on the 1x1 filter\n# Use True only for the original MSRA ResNet; use False for C2 and Torch models\n__C.RESNETS.STRIDE_1X1 = True\n\n# Residual transformation function\n__C.RESNETS.TRANS_FUNC = 'bottleneck_transformation'\n# ResNet's stem function (conv1 and pool1)\n__C.RESNETS.STEM_FUNC = 'basic_bn_stem'\n# ResNet's shortcut function\n__C.RESNETS.SHORTCUT_FUNC = 'basic_bn_shortcut'\n\n# Apply dilation in stage \"res5\"\n__C.RESNETS.RES5_DILATION = 1\n\n\n# ---------------------------------------------------------------------------- #\n# GroupNorm options\n# ---------------------------------------------------------------------------- #\n__C.GROUP_NORM = AttrDict()\n# Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS)\n__C.GROUP_NORM.DIM_PER_GP = -1\n# Number of groups in GroupNorm (-1 if using DIM_PER_GP)\n__C.GROUP_NORM.NUM_GROUPS = 32\n# GroupNorm's small constant in the denominator\n__C.GROUP_NORM.EPSILON = 1e-5\n\n\n# ---------------------------------------------------------------------------- #\n# Misc options\n# ---------------------------------------------------------------------------- #\n\n# Number of GPUs to use (applies to both training and testing)\n__C.NUM_GPUS = 1\n\n# Use NCCL for all reduce, otherwise use muji\n# Warning: if set to True, you may experience deadlocks\n__C.USE_NCCL = False\n\n# The mapping from image coordinates to feature map coordinates might cause\n# some boxes that are distinct in image space to become identical in feature\n# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor\n# for identifying duplicate boxes.\n# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16\n__C.DEDUP_BOXES = 1 / 16.\n\n# Clip bounding box transformation predictions to prevent np.exp from\n# overflowing\n# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels\n__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)\n\n# Pixel mean values (BGR order) as a (1, 1, 3) array\n# We use the same pixel mean for all networks even though it's not exactly what\n# they were trained with\n# \"Fun\" fact: the history of where these values comes from is lost\n__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])\n\n# For reproducibility...but not really because modern fast GPU libraries use\n# non-deterministic op implementations\n__C.RNG_SEED = 3\n\n# A small number that's used many times\n__C.EPS = 1e-14\n\n# Root directory of project\n__C.ROOT_DIR = os.getcwd()\n\n# Output basedir\n__C.OUTPUT_DIR = '/tmp'\n\n# Name (or path to) the matlab executable\n__C.MATLAB = 'matlab'\n\n# Reduce memory usage with memonger gradient blob sharing\n__C.MEMONGER = True\n\n# Futher reduce memory by allowing forward pass activations to be shared when\n# possible. Note that this will cause activation blob inspection (values,\n# shapes, etc.) to be meaningless when activation blobs are reused.\n__C.MEMONGER_SHARE_ACTIVATIONS = False\n\n# Dump detection visualizations\n__C.VIS = False\n\n# Score threshold for visualization\n__C.VIS_TH = 0.9\n\n# Expected results should take the form of a list of expectations, each\n# specified by four elements (dataset, task, metric, expected value). For\n# example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387]]\n__C.EXPECTED_RESULTS = []\n# Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS\n__C.EXPECTED_RESULTS_RTOL = 0.1\n__C.EXPECTED_RESULTS_ATOL = 0.005\n# When the expected value specifies a mean and standard deviation, we check\n# that the actual value is within mean +/- SIGMA_TOL * std\n__C.EXPECTED_RESULTS_SIGMA_TOL = 4\n# Set to send email in case of an EXPECTED_RESULTS failure\n__C.EXPECTED_RESULTS_EMAIL = ''\n\n# Models and proposals referred to by URL are downloaded to a local cache\n# specified by DOWNLOAD_CACHE\n__C.DOWNLOAD_CACHE = '/tmp/detectron-download-cache'\n\n\n# ---------------------------------------------------------------------------- #\n# Cluster options\n# ---------------------------------------------------------------------------- #\n__C.CLUSTER = AttrDict()\n\n# Flag to indicate if the code is running in a cluster environment\n__C.CLUSTER.ON_CLUSTER = False\n\n\n# ---------------------------------------------------------------------------- #\n# Deprecated options\n# If an option is removed from the code and you don't want to break existing\n# yaml configs, you can add the full config key as a string to the set below.\n# ---------------------------------------------------------------------------- #\n_DEPRECATED_KEYS = set(\n    {\n        'FINAL_MSG',\n        'MODEL.DILATION',\n        'ROOT_GPU_ID',\n        'RPN.ON',\n        'TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED',\n        'TRAIN.DROPOUT',\n        'USE_GPU_NMS',\n        'TEST.NUM_TEST_IMAGES',\n    }\n)\n\n\n# ---------------------------------------------------------------------------- #\n# Renamed options\n# If you rename a config option, record the mapping from the old name to the new\n# name in the dictionary below. Optionally, if the type also changed, you can\n# make the value a tuple that specifies first the renamed key and then\n# instructions for how to edit the config file.\n# ---------------------------------------------------------------------------- #\n_RENAMED_KEYS = {\n    'EXAMPLE.RENAMED.KEY': 'EXAMPLE.KEY',  # Dummy example to follow\n    'MODEL.PS_GRID_SIZE': 'RFCN.PS_GRID_SIZE',\n    'MODEL.ROI_HEAD': 'FAST_RCNN.ROI_BOX_HEAD',\n    'MRCNN.MASK_HEAD_NAME': 'MRCNN.ROI_MASK_HEAD',\n    'TRAIN.DATASET': (\n        'TRAIN.DATASETS',\n        \"Also convert to a tuple, e.g., \" +\n        \"'coco_2014_train' -> ('coco_2014_train',) or \" +\n        \"'coco_2014_train:coco_2014_valminusminival' -> \" +\n        \"('coco_2014_train', 'coco_2014_valminusminival')\"\n    ),\n    'TRAIN.PROPOSAL_FILE': (\n        'TRAIN.PROPOSAL_FILES',\n        \"Also convert to a tuple, e.g., \" +\n        \"'path/to/file' -> ('path/to/file',) or \" +\n        \"'path/to/file1:path/to/file2' -> \" +\n        \"('path/to/file1', 'path/to/file2')\"\n    ),\n    'TEST.SCALES': (\n        'TEST.SCALE',\n        \"Also convert from a tuple, e.g. (600, ), \" +\n        \"to a integer, e.g. 600.\"\n    ),\n    'TEST.DATASET': (\n        'TEST.DATASETS',\n        \"Also convert from a string, e.g 'coco_2014_minival', \" +\n        \"to a tuple, e.g. ('coco_2014_minival', ).\"\n    ),\n    'TEST.PROPOSAL_FILE': (\n        'TEST.PROPOSAL_FILES',\n        \"Also convert from a string, e.g. '/path/to/props.pkl', \" +\n        \"to a tuple, e.g. ('/path/to/props.pkl', ).\"\n    ),\n}\n\n\n# ---------------------------------------------------------------------------- #\n# Renamed modules\n# If a module containing a data structure used in the config (e.g. AttrDict)\n# is renamed/moved and you don't want to break loading of existing yaml configs\n# (e.g. from weights files) you can specify the renamed module below.\n# ---------------------------------------------------------------------------- #\n_RENAMED_MODULES = {\n    'utils.collections': 'detectron.utils.collections',\n}\n\n\ndef assert_and_infer_cfg(cache_urls=True, make_immutable=True):\n    \"\"\"Call this function in your script after you have finished setting all cfg\n    values that are necessary (e.g., merging a config from a file, merging\n    command line config options, etc.). By default, this function will also\n    mark the global cfg as immutable to prevent changing the global cfg settings\n    during script execution (which can lead to hard to debug errors or code\n    that's harder to understand than is necessary).\n    \"\"\"\n    if __C.MODEL.RPN_ONLY or __C.MODEL.FASTER_RCNN:\n        __C.RPN.RPN_ON = True\n    if __C.RPN.RPN_ON or __C.RETINANET.RETINANET_ON:\n        __C.TEST.PRECOMPUTED_PROPOSALS = False\n    if cache_urls:\n        cache_cfg_urls()\n    if make_immutable:\n        cfg.immutable(True)\n\n\ndef cache_cfg_urls():\n    \"\"\"Download URLs in the config, cache them locally, and rewrite cfg to make\n    use of the locally cached file.\n    \"\"\"\n    __C.TRAIN.WEIGHTS = cache_url(__C.TRAIN.WEIGHTS, __C.DOWNLOAD_CACHE)\n    __C.TEST.WEIGHTS = cache_url(__C.TEST.WEIGHTS, __C.DOWNLOAD_CACHE)\n    __C.TRAIN.PROPOSAL_FILES = tuple(\n        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TRAIN.PROPOSAL_FILES\n    )\n    __C.TEST.PROPOSAL_FILES = tuple(\n        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TEST.PROPOSAL_FILES\n    )\n\n\ndef get_output_dir(datasets, training=True):\n    \"\"\"Get the output directory determined by the current global config.\"\"\"\n    assert isinstance(datasets, tuple([tuple, list] + list(six.string_types))), \\\n        'datasets argument must be of type tuple, list or string'\n    is_string = isinstance(datasets, six.string_types)\n    dataset_name = datasets if is_string else ':'.join(datasets)\n    tag = 'train' if training else 'test'\n    # <output-dir>/<train|test>/<dataset-name>/<model-type>/\n    outdir = osp.join(__C.OUTPUT_DIR, tag, dataset_name, __C.MODEL.TYPE)\n    if not osp.exists(outdir):\n        os.makedirs(outdir)\n    return outdir\n\n\ndef load_cfg(cfg_to_load):\n    \"\"\"Wrapper around yaml.load used for maintaining backward compatibility\"\"\"\n    file_types = [file, io.IOBase] if six.PY2 else [io.IOBase]  # noqa false positive\n    expected_types = tuple(file_types + list(six.string_types))\n    assert isinstance(cfg_to_load, expected_types), \\\n        'Expected one of {}, got {}'.format(expected_types, type(cfg_to_load))\n    if isinstance(cfg_to_load, tuple(file_types)):\n        cfg_to_load = ''.join(cfg_to_load.readlines())\n    for old_module, new_module in iteritems(_RENAMED_MODULES):\n        # yaml object encoding: !!python/object/new:<module>.<object>\n        old_module, new_module = 'new:' + old_module, 'new:' + new_module\n        cfg_to_load = cfg_to_load.replace(old_module, new_module)\n    # Import inline due to a circular dependency between env.py and config.py\n    import detectron.utils.env as envu\n    return envu.yaml_load(cfg_to_load)\n\n\ndef merge_cfg_from_file(cfg_filename):\n    \"\"\"Load a yaml config file and merge it into the global config.\"\"\"\n    with open(cfg_filename, 'r') as f:\n        yaml_cfg = AttrDict(load_cfg(f))\n    _merge_a_into_b(yaml_cfg, __C)\n\n\ndef merge_cfg_from_cfg(cfg_other):\n    \"\"\"Merge `cfg_other` into the global config.\"\"\"\n    _merge_a_into_b(cfg_other, __C)\n\n\ndef merge_cfg_from_list(cfg_list):\n    \"\"\"Merge config keys, values in a list (e.g., from command line) into the\n    global config. For example, `cfg_list = ['TEST.NMS', 0.5]`.\n    \"\"\"\n    assert len(cfg_list) % 2 == 0\n    for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):\n        if _key_is_deprecated(full_key):\n            continue\n        if _key_is_renamed(full_key):\n            _raise_key_rename_error(full_key)\n        key_list = full_key.split('.')\n        d = __C\n        for subkey in key_list[:-1]:\n            assert subkey in d, 'Non-existent key: {}'.format(full_key)\n            d = d[subkey]\n        subkey = key_list[-1]\n        assert subkey in d, 'Non-existent key: {}'.format(full_key)\n        value = _decode_cfg_value(v)\n        value = _check_and_coerce_cfg_value_type(\n            value, d[subkey], subkey, full_key\n        )\n        d[subkey] = value\n\n\ndef _merge_a_into_b(a, b, stack=None):\n    \"\"\"Merge config dictionary a into config dictionary b, clobbering the\n    options in b whenever they are also specified in a.\n    \"\"\"\n    assert isinstance(a, AttrDict), \\\n        '`a` (cur type {}) must be an instance of {}'.format(type(a), AttrDict)\n    assert isinstance(b, AttrDict), \\\n        '`b` (cur type {}) must be an instance of {}'.format(type(b), AttrDict)\n\n    for k, v_ in a.items():\n        full_key = '.'.join(stack) + '.' + k if stack is not None else k\n        # a must specify keys that are in b\n        if k not in b:\n            if _key_is_deprecated(full_key):\n                continue\n            elif _key_is_renamed(full_key):\n                _raise_key_rename_error(full_key)\n            else:\n                raise KeyError('Non-existent config key: {}'.format(full_key))\n\n        v = copy.deepcopy(v_)\n        v = _decode_cfg_value(v)\n        v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key)\n\n        # Recursively merge dicts\n        if isinstance(v, AttrDict):\n            try:\n                stack_push = [k] if stack is None else stack + [k]\n                _merge_a_into_b(v, b[k], stack=stack_push)\n            except BaseException:\n                raise\n        else:\n            b[k] = v\n\n\ndef _key_is_deprecated(full_key):\n    if full_key in _DEPRECATED_KEYS:\n        logger.warn(\n            'Deprecated config key (ignoring): {}'.format(full_key)\n        )\n        return True\n    return False\n\n\ndef _key_is_renamed(full_key):\n    return full_key in _RENAMED_KEYS\n\n\ndef _raise_key_rename_error(full_key):\n    new_key = _RENAMED_KEYS[full_key]\n    if isinstance(new_key, tuple):\n        msg = ' Note: ' + new_key[1]\n        new_key = new_key[0]\n    else:\n        msg = ''\n    raise KeyError(\n        'Key {} was renamed to {}; please update your config.{}'.\n        format(full_key, new_key, msg)\n    )\n\n\ndef _decode_cfg_value(v):\n    \"\"\"Decodes a raw config value (e.g., from a yaml config files or command\n    line argument) into a Python object.\n    \"\"\"\n    # Configs parsed from raw yaml will contain dictionary keys that need to be\n    # converted to AttrDict objects\n    if isinstance(v, dict):\n        return AttrDict(v)\n    # All remaining processing is only applied to strings\n    if not isinstance(v, six.string_types):\n        return v\n    # Try to interpret `v` as a:\n    #   string, number, tuple, list, dict, boolean, or None\n    try:\n        v = literal_eval(v)\n    # The following two excepts allow v to pass through when it represents a\n    # string.\n    #\n    # Longer explanation:\n    # The type of v is always a string (before calling literal_eval), but\n    # sometimes it *represents* a string and other times a data structure, like\n    # a list. In the case that v represents a string, what we got back from the\n    # yaml parser is 'foo' *without quotes* (so, not '\"foo\"'). literal_eval is\n    # ok with '\"foo\"', but will raise a ValueError if given 'foo'. In other\n    # cases, like paths (v = 'foo/bar' and not v = '\"foo/bar\"'), literal_eval\n    # will raise a SyntaxError.\n    except ValueError:\n        pass\n    except SyntaxError:\n        pass\n    return v\n\n\ndef _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key):\n    \"\"\"Checks that `value_a`, which is intended to replace `value_b` is of the\n    right type. The type is correct if it matches exactly or is one of a few\n    cases in which the type can be easily coerced.\n    \"\"\"\n    # The types must match (with some exceptions)\n    type_b = type(value_b)\n    type_a = type(value_a)\n    if type_a is type_b:\n        return value_a\n\n    # Exceptions: numpy arrays, strings, tuple<->list\n    if isinstance(value_b, np.ndarray):\n        value_a = np.array(value_a, dtype=value_b.dtype)\n    elif isinstance(value_b, six.string_types):\n        value_a = str(value_a)\n    elif isinstance(value_a, tuple) and isinstance(value_b, list):\n        value_a = list(value_a)\n    elif isinstance(value_a, list) and isinstance(value_b, tuple):\n        value_a = tuple(value_a)\n    else:\n        raise ValueError(\n            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '\n            'key: {}'.format(type_b, type_a, value_b, value_a, full_key)\n        )\n    return value_a\n"
  },
  {
    "path": "detectron/core/rpn_generator.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Functions for RPN proposal generation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport datetime\nimport logging\nimport numpy as np\nimport os\n\nfrom caffe2.python import core\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets import task_evaluation\nfrom detectron.datasets.json_dataset import JsonDataset\nfrom detectron.modeling import model_builder\nfrom detectron.utils.io import save_object\nfrom detectron.utils.timer import Timer\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.env as envu\nimport detectron.utils.net as nu\nimport detectron.utils.subprocess as subprocess_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef generate_rpn_on_dataset(\n    weights_file,\n    dataset_name,\n    _proposal_file_ignored,\n    output_dir,\n    multi_gpu=False,\n    gpu_id=0\n):\n    \"\"\"Run inference on a dataset.\"\"\"\n    dataset = JsonDataset(dataset_name)\n    test_timer = Timer()\n    test_timer.tic()\n    if multi_gpu:\n        num_images = len(dataset.get_roidb())\n        _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset(\n            weights_file, dataset_name, _proposal_file_ignored, num_images,\n            output_dir\n        )\n    else:\n        # Processes entire dataset range by default\n        _boxes, _scores, _ids, rpn_file = generate_rpn_on_range(\n            weights_file,\n            dataset_name,\n            _proposal_file_ignored,\n            output_dir,\n            gpu_id=gpu_id\n        )\n    test_timer.toc()\n    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))\n    return evaluate_proposal_file(dataset, rpn_file, output_dir)\n\n\ndef multi_gpu_generate_rpn_on_dataset(\n    weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir\n):\n    \"\"\"Multi-gpu inference on a dataset.\"\"\"\n    # Retrieve the test_net binary path\n    binary_dir = envu.get_runtime_dir()\n    binary_ext = envu.get_py_bin_ext()\n    binary = os.path.join(binary_dir, 'test_net' + binary_ext)\n    assert os.path.exists(binary), 'Binary \\'{}\\' not found'.format(binary)\n\n    # Pass the target dataset via the command line\n    opts = ['TEST.DATASETS', '(\"{}\",)'.format(dataset_name)]\n    opts += ['TEST.WEIGHTS', weights_file]\n\n    # Run inference in parallel in subprocesses\n    outputs = subprocess_utils.process_in_parallel(\n        'rpn_proposals', num_images, binary, output_dir, opts\n    )\n\n    # Collate the results from each subprocess\n    boxes, scores, ids = [], [], []\n    for rpn_data in outputs:\n        boxes += rpn_data['boxes']\n        scores += rpn_data['scores']\n        ids += rpn_data['ids']\n    rpn_file = os.path.join(output_dir, 'rpn_proposals.pkl')\n    cfg_yaml = envu.yaml_dump(cfg)\n    save_object(\n        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file\n    )\n    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))\n    return boxes, scores, ids, rpn_file\n\n\ndef generate_rpn_on_range(\n    weights_file,\n    dataset_name,\n    _proposal_file_ignored,\n    output_dir,\n    ind_range=None,\n    gpu_id=0\n):\n    \"\"\"Run inference on all images in a dataset or over an index range of images\n    in a dataset using a single GPU.\n    \"\"\"\n    assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN\n\n    roidb, start_ind, end_ind, total_num_images = get_roidb(\n        dataset_name, ind_range\n    )\n    logger.info(\n        'Output will be saved to: {:s}'.format(os.path.abspath(output_dir))\n    )\n\n    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)\n    nu.initialize_gpu_from_weights_file(\n        model, weights_file, gpu_id=gpu_id,\n    )\n    model_builder.add_inference_inputs(model)\n    workspace.CreateNet(model.net)\n\n    boxes, scores, ids = generate_proposals_on_roidb(\n        model,\n        roidb,\n        start_ind=start_ind,\n        end_ind=end_ind,\n        total_num_images=total_num_images,\n        gpu_id=gpu_id,\n    )\n\n    cfg_yaml = envu.yaml_dump(cfg)\n    if ind_range is not None:\n        rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range)\n    else:\n        rpn_name = 'rpn_proposals.pkl'\n    rpn_file = os.path.join(output_dir, rpn_name)\n    save_object(\n        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file\n    )\n    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))\n    return boxes, scores, ids, rpn_file\n\n\ndef generate_proposals_on_roidb(\n    model, roidb, start_ind=None, end_ind=None, total_num_images=None,\n    gpu_id=0,\n):\n    \"\"\"Generate RPN proposals on all images in an imdb.\"\"\"\n    _t = Timer()\n    num_images = len(roidb)\n    roidb_boxes = [[] for _ in range(num_images)]\n    roidb_scores = [[] for _ in range(num_images)]\n    roidb_ids = [[] for _ in range(num_images)]\n    if start_ind is None:\n        start_ind = 0\n        end_ind = num_images\n        total_num_images = num_images\n    for i in range(num_images):\n        roidb_ids[i] = roidb[i]['id']\n        im = cv2.imread(roidb[i]['image'])\n        with c2_utils.NamedCudaScope(gpu_id):\n            _t.tic()\n            roidb_boxes[i], roidb_scores[i] = im_proposals(model, im)\n            _t.toc()\n        if i % 10 == 0:\n            ave_time = _t.average_time\n            eta_seconds = ave_time * (num_images - i - 1)\n            eta = str(datetime.timedelta(seconds=int(eta_seconds)))\n            logger.info(\n                (\n                    'rpn_generate: range [{:d}, {:d}] of {:d}: '\n                    '{:d}/{:d} {:.3f}s (eta: {})'\n                ).format(\n                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,\n                    start_ind + num_images, ave_time, eta\n                )\n            )\n\n    return roidb_boxes, roidb_scores, roidb_ids\n\n\ndef im_proposals(model, im):\n    \"\"\"Generate RPN proposals on a single image.\"\"\"\n    inputs = {}\n    inputs['data'], im_scale, inputs['im_info'] = \\\n        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)\n    for k, v in inputs.items():\n        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))\n    workspace.RunNet(model.net.Proto().name)\n\n    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:\n        k_max = cfg.FPN.RPN_MAX_LEVEL\n        k_min = cfg.FPN.RPN_MIN_LEVEL\n        rois_names = [\n            core.ScopedName('rpn_rois_fpn' + str(l))\n            for l in range(k_min, k_max + 1)\n        ]\n        score_names = [\n            core.ScopedName('rpn_roi_probs_fpn' + str(l))\n            for l in range(k_min, k_max + 1)\n        ]\n        blobs = workspace.FetchBlobs(rois_names + score_names)\n        # Combine predictions across all levels and retain the top scoring\n        boxes = np.concatenate(blobs[:len(rois_names)])\n        scores = np.concatenate(blobs[len(rois_names):]).squeeze()\n        # Discussion: one could do NMS again after combining predictions from\n        # the different FPN levels. Conceptually, it's probably the right thing\n        # to do. For arbitrary reasons, the original FPN RPN implementation did\n        # not do another round of NMS.\n        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]\n        scores = scores[inds]\n        boxes = boxes[inds, :]\n    else:\n        boxes, scores = workspace.FetchBlobs(\n            [core.ScopedName('rpn_rois'),\n             core.ScopedName('rpn_roi_probs')]\n        )\n        scores = scores.squeeze()\n\n    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,\n    # so we remove it since we just want to return boxes\n    # Scale proposals back to the original input image scale\n    boxes = boxes[:, 1:] / im_scale\n    return boxes, scores\n\n\ndef get_roidb(dataset_name, ind_range):\n    \"\"\"Get the roidb for the dataset specified in the global cfg. Optionally\n    restrict it to a range of indices if ind_range is a pair of integers.\n    \"\"\"\n    dataset = JsonDataset(dataset_name)\n    roidb = dataset.get_roidb()\n\n    if ind_range is not None:\n        total_num_images = len(roidb)\n        start, end = ind_range\n        roidb = roidb[start:end]\n    else:\n        start = 0\n        end = len(roidb)\n        total_num_images = end\n\n    return roidb, start, end, total_num_images\n\n\ndef evaluate_proposal_file(dataset, proposal_file, output_dir):\n    \"\"\"Evaluate box proposal average recall.\"\"\"\n    roidb = dataset.get_roidb(gt=True, proposal_file=proposal_file)\n    results = task_evaluation.evaluate_box_proposals(dataset, roidb)\n    task_evaluation.log_box_proposal_results(results)\n    recall_file = os.path.join(output_dir, 'rpn_proposal_recall.pkl')\n    save_object(results, recall_file)\n    return results\n"
  },
  {
    "path": "detectron/core/test.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Inference functionality for most Detectron models.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import defaultdict\nimport cv2\nimport logging\nimport numpy as np\n\nfrom caffe2.python import core\nfrom caffe2.python import workspace\nimport pycocotools.mask as mask_util\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.timer import Timer\nimport detectron.core.test_retinanet as test_retinanet\nimport detectron.modeling.FPN as fpn\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\nimport detectron.utils.image as image_utils\nimport detectron.utils.keypoints as keypoint_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef im_detect_all(model, im, box_proposals, timers=None):\n    if timers is None:\n        timers = defaultdict(Timer)\n\n    # Handle RetinaNet testing separately for now\n    if cfg.RETINANET.RETINANET_ON:\n        cls_boxes = test_retinanet.im_detect_bbox(model, im, timers)\n        return cls_boxes, None, None\n\n    timers['im_detect_bbox'].tic()\n    if cfg.TEST.BBOX_AUG.ENABLED:\n        scores, boxes, im_scale = im_detect_bbox_aug(model, im, box_proposals)\n    else:\n        scores, boxes, im_scale = im_detect_bbox(\n            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals\n        )\n    timers['im_detect_bbox'].toc()\n\n    # score and boxes are from the whole image after score thresholding and nms\n    # (they are not separated by class)\n    # cls_boxes boxes and scores are separated by class and in the format used\n    # for evaluating results\n    timers['misc_bbox'].tic()\n    scores, boxes, cls_boxes = box_results_with_nms_and_limit(scores, boxes)\n    timers['misc_bbox'].toc()\n\n    if cfg.MODEL.MASK_ON and boxes.shape[0] > 0:\n        timers['im_detect_mask'].tic()\n        if cfg.TEST.MASK_AUG.ENABLED:\n            masks = im_detect_mask_aug(model, im, boxes)\n        else:\n            masks = im_detect_mask(model, im_scale, boxes)\n        timers['im_detect_mask'].toc()\n\n        timers['misc_mask'].tic()\n        cls_segms = segm_results(\n            cls_boxes, masks, boxes, im.shape[0], im.shape[1]\n        )\n        timers['misc_mask'].toc()\n    else:\n        cls_segms = None\n\n    if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0:\n        timers['im_detect_keypoints'].tic()\n        if cfg.TEST.KPS_AUG.ENABLED:\n            heatmaps = im_detect_keypoints_aug(model, im, boxes)\n        else:\n            heatmaps = im_detect_keypoints(model, im_scale, boxes)\n        timers['im_detect_keypoints'].toc()\n\n        timers['misc_keypoints'].tic()\n        cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes)\n        timers['misc_keypoints'].toc()\n    else:\n        cls_keyps = None\n\n    return cls_boxes, cls_segms, cls_keyps\n\n\ndef im_conv_body_only(model, im, target_scale, target_max_size):\n    \"\"\"Runs `model.conv_body_net` on the given image `im`.\"\"\"\n    im_blob, im_scale, _im_info = blob_utils.get_image_blob(\n        im, target_scale, target_max_size\n    )\n    workspace.FeedBlob(core.ScopedName('data'), im_blob)\n    workspace.RunNet(model.conv_body_net.Proto().name)\n    return im_scale\n\n\ndef im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):\n    \"\"\"Bounding box object detection for an image with given box proposals.\n\n    Arguments:\n        model (DetectionModelHelper): the detection model to use\n        im (ndarray): color image to test (in BGR order)\n        boxes (ndarray): R x 4 array of object proposals in 0-indexed\n            [x1, y1, x2, y2] format, or None if using RPN\n\n    Returns:\n        scores (ndarray): R x K array of object class scores for K classes\n            (K includes background as object category 0)\n        boxes (ndarray): R x 4*K array of predicted bounding boxes\n        im_scales (list): list of image scales used in the input blob (as\n            returned by _get_blobs and for use with im_detect_mask, etc.)\n    \"\"\"\n    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)\n\n    # When mapping from image ROIs to feature map ROIs, there's some aliasing\n    # (some distinct image ROIs get mapped to the same feature ROI).\n    # Here, we identify duplicate feature ROIs, so we only compute features\n    # on the unique subset.\n    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:\n        v = np.array([1, 1e3, 1e6, 1e9, 1e12])\n        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)\n        _, index, inv_index = np.unique(\n            hashes, return_index=True, return_inverse=True\n        )\n        inputs['rois'] = inputs['rois'][index, :]\n        boxes = boxes[index, :]\n\n    # Add multi-level rois for FPN\n    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:\n        _add_multilevel_rois_for_test(inputs, 'rois')\n\n    for k, v in inputs.items():\n        workspace.FeedBlob(core.ScopedName(k), v)\n    workspace.RunNet(model.net.Proto().name)\n\n    # Read out blobs\n    if cfg.MODEL.FASTER_RCNN:\n        rois = workspace.FetchBlob(core.ScopedName('rois'))\n        # unscale back to raw image space\n        boxes = rois[:, 1:5] / im_scale\n\n    # Softmax class probabilities\n    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()\n    # In case there is 1 proposal\n    scores = scores.reshape([-1, scores.shape[-1]])\n\n    if cfg.TEST.BBOX_REG:\n        # Apply bounding-box regression deltas\n        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()\n        # In case there is 1 proposal\n        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])\n        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:\n            # Remove predictions for bg class (compat with MSRA code)\n            box_deltas = box_deltas[:, -4:]\n        pred_boxes = box_utils.bbox_transform(\n            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS\n        )\n        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)\n        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:\n            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))\n    else:\n        # Simply repeat the boxes, once for each class\n        pred_boxes = np.tile(boxes, (1, scores.shape[1]))\n\n    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:\n        # Map scores and predictions back to the original set of boxes\n        scores = scores[inv_index, :]\n        pred_boxes = pred_boxes[inv_index, :]\n\n    return scores, pred_boxes, im_scale\n\n\ndef im_detect_bbox_aug(model, im, box_proposals=None):\n    \"\"\"Performs bbox detection with test-time augmentations.\n    Function signature is the same as for im_detect_bbox.\n    \"\"\"\n    assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \\\n        'Size dependent scaling not implemented'\n    assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \\\n        cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \\\n        'Coord heuristic must be union whenever score heuristic is union'\n    assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \\\n        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \\\n        'Score heuristic must be union whenever coord heuristic is union'\n    assert not cfg.MODEL.FASTER_RCNN or \\\n        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \\\n        'Union heuristic must be used to combine Faster RCNN predictions'\n\n    # Collect detections computed under different transformations\n    scores_ts = []\n    boxes_ts = []\n\n    def add_preds_t(scores_t, boxes_t):\n        scores_ts.append(scores_t)\n        boxes_ts.append(boxes_t)\n\n    # Perform detection on the horizontally flipped image\n    if cfg.TEST.BBOX_AUG.H_FLIP:\n        scores_hf, boxes_hf, _ = im_detect_bbox_hflip(\n            model,\n            im,\n            cfg.TEST.SCALE,\n            cfg.TEST.MAX_SIZE,\n            box_proposals=box_proposals\n        )\n        add_preds_t(scores_hf, boxes_hf)\n\n    # Compute detections at different scales\n    for scale in cfg.TEST.BBOX_AUG.SCALES:\n        max_size = cfg.TEST.BBOX_AUG.MAX_SIZE\n        scores_scl, boxes_scl = im_detect_bbox_scale(\n            model, im, scale, max_size, box_proposals\n        )\n        add_preds_t(scores_scl, boxes_scl)\n\n        if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:\n            scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale(\n                model, im, scale, max_size, box_proposals, hflip=True\n            )\n            add_preds_t(scores_scl_hf, boxes_scl_hf)\n\n    # Perform detection at different aspect ratios\n    for aspect_ratio in cfg.TEST.BBOX_AUG.ASPECT_RATIOS:\n        scores_ar, boxes_ar = im_detect_bbox_aspect_ratio(\n            model, im, aspect_ratio, box_proposals\n        )\n        add_preds_t(scores_ar, boxes_ar)\n\n        if cfg.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP:\n            scores_ar_hf, boxes_ar_hf = im_detect_bbox_aspect_ratio(\n                model, im, aspect_ratio, box_proposals, hflip=True\n            )\n            add_preds_t(scores_ar_hf, boxes_ar_hf)\n\n    # Compute detections for the original image (identity transform) last to\n    # ensure that the Caffe2 workspace is populated with blobs corresponding\n    # to the original image on return (postcondition of im_detect_bbox)\n    scores_i, boxes_i, im_scale_i = im_detect_bbox(\n        model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals\n    )\n    add_preds_t(scores_i, boxes_i)\n\n    # Combine the predicted scores\n    if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID':\n        scores_c = scores_i\n    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG':\n        scores_c = np.mean(scores_ts, axis=0)\n    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION':\n        scores_c = np.vstack(scores_ts)\n    else:\n        raise NotImplementedError(\n            'Score heur {} not supported'.format(cfg.TEST.BBOX_AUG.SCORE_HEUR)\n        )\n\n    # Combine the predicted boxes\n    if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID':\n        boxes_c = boxes_i\n    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG':\n        boxes_c = np.mean(boxes_ts, axis=0)\n    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION':\n        boxes_c = np.vstack(boxes_ts)\n    else:\n        raise NotImplementedError(\n            'Coord heur {} not supported'.format(cfg.TEST.BBOX_AUG.COORD_HEUR)\n        )\n\n    return scores_c, boxes_c, im_scale_i\n\n\ndef im_detect_bbox_hflip(\n    model, im, target_scale, target_max_size, box_proposals=None\n):\n    \"\"\"Performs bbox detection on the horizontally flipped image.\n    Function signature is the same as for im_detect_bbox.\n    \"\"\"\n    # Compute predictions on the flipped image\n    im_hf = im[:, ::-1, :]\n    im_width = im.shape[1]\n\n    if not cfg.MODEL.FASTER_RCNN:\n        box_proposals_hf = box_utils.flip_boxes(box_proposals, im_width)\n    else:\n        box_proposals_hf = None\n\n    scores_hf, boxes_hf, im_scale = im_detect_bbox(\n        model, im_hf, target_scale, target_max_size, boxes=box_proposals_hf\n    )\n\n    # Invert the detections computed on the flipped image\n    boxes_inv = box_utils.flip_boxes(boxes_hf, im_width)\n\n    return scores_hf, boxes_inv, im_scale\n\n\ndef im_detect_bbox_scale(\n    model, im, target_scale, target_max_size, box_proposals=None, hflip=False\n):\n    \"\"\"Computes bbox detections at the given scale.\n    Returns predictions in the original image space.\n    \"\"\"\n    if hflip:\n        scores_scl, boxes_scl, _ = im_detect_bbox_hflip(\n            model, im, target_scale, target_max_size, box_proposals=box_proposals\n        )\n    else:\n        scores_scl, boxes_scl, _ = im_detect_bbox(\n            model, im, target_scale, target_max_size, boxes=box_proposals\n        )\n    return scores_scl, boxes_scl\n\n\ndef im_detect_bbox_aspect_ratio(\n    model, im, aspect_ratio, box_proposals=None, hflip=False\n):\n    \"\"\"Computes bbox detections at the given width-relative aspect ratio.\n    Returns predictions in the original image space.\n    \"\"\"\n    # Compute predictions on the transformed image\n    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)\n\n    if not cfg.MODEL.FASTER_RCNN:\n        box_proposals_ar = box_utils.aspect_ratio(box_proposals, aspect_ratio)\n    else:\n        box_proposals_ar = None\n\n    if hflip:\n        scores_ar, boxes_ar, _ = im_detect_bbox_hflip(\n            model,\n            im_ar,\n            cfg.TEST.SCALE,\n            cfg.TEST.MAX_SIZE,\n            box_proposals=box_proposals_ar\n        )\n    else:\n        scores_ar, boxes_ar, _ = im_detect_bbox(\n            model,\n            im_ar,\n            cfg.TEST.SCALE,\n            cfg.TEST.MAX_SIZE,\n            boxes=box_proposals_ar\n        )\n\n    # Invert the detected boxes\n    boxes_inv = box_utils.aspect_ratio(boxes_ar, 1.0 / aspect_ratio)\n\n    return scores_ar, boxes_inv\n\n\ndef im_detect_mask(model, im_scale, boxes):\n    \"\"\"Infer instance segmentation masks. This function must be called after\n    im_detect_bbox as it assumes that the Caffe2 workspace is already populated\n    with the necessary blobs.\n\n    Arguments:\n        model (DetectionModelHelper): the detection model to use\n        im_scales (list): image blob scales as returned by im_detect_bbox\n        boxes (ndarray): R x 4 array of bounding box detections (e.g., as\n            returned by im_detect_bbox)\n\n    Returns:\n        pred_masks (ndarray): R x K x M x M array of class specific soft masks\n            output by the network (must be processed by segm_results to convert\n            into hard masks in the original image coordinate space)\n    \"\"\"\n    M = cfg.MRCNN.RESOLUTION\n    if boxes.shape[0] == 0:\n        pred_masks = np.zeros((0, M, M), np.float32)\n        return pred_masks\n\n    inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)}\n    # Add multi-level rois for FPN\n    if cfg.FPN.MULTILEVEL_ROIS:\n        _add_multilevel_rois_for_test(inputs, 'mask_rois')\n\n    for k, v in inputs.items():\n        workspace.FeedBlob(core.ScopedName(k), v)\n    workspace.RunNet(model.mask_net.Proto().name)\n\n    # Fetch masks\n    pred_masks = workspace.FetchBlob(\n        core.ScopedName('mask_fcn_probs')\n    ).squeeze()\n\n    if cfg.MRCNN.CLS_SPECIFIC_MASK:\n        pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M])\n    else:\n        pred_masks = pred_masks.reshape([-1, 1, M, M])\n\n    return pred_masks\n\n\ndef im_detect_mask_aug(model, im, boxes):\n    \"\"\"Performs mask detection with test-time augmentations.\n\n    Arguments:\n        model (DetectionModelHelper): the detection model to use\n        im (ndarray): BGR image to test\n        boxes (ndarray): R x 4 array of bounding boxes\n\n    Returns:\n        masks (ndarray): R x K x M x M array of class specific soft masks\n    \"\"\"\n    assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \\\n        'Size dependent scaling not implemented'\n\n    # Collect masks computed under different transformations\n    masks_ts = []\n\n    # Compute masks for the original image (identity transform)\n    im_scale_i = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)\n    masks_i = im_detect_mask(model, im_scale_i, boxes)\n    masks_ts.append(masks_i)\n\n    # Perform mask detection on the horizontally flipped image\n    if cfg.TEST.MASK_AUG.H_FLIP:\n        masks_hf = im_detect_mask_hflip(\n            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes\n        )\n        masks_ts.append(masks_hf)\n\n    # Compute detections at different scales\n    for scale in cfg.TEST.MASK_AUG.SCALES:\n        max_size = cfg.TEST.MASK_AUG.MAX_SIZE\n        masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes)\n        masks_ts.append(masks_scl)\n\n        if cfg.TEST.MASK_AUG.SCALE_H_FLIP:\n            masks_scl_hf = im_detect_mask_scale(\n                model, im, scale, max_size, boxes, hflip=True\n            )\n            masks_ts.append(masks_scl_hf)\n\n    # Compute masks at different aspect ratios\n    for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS:\n        masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes)\n        masks_ts.append(masks_ar)\n\n        if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP:\n            masks_ar_hf = im_detect_mask_aspect_ratio(\n                model, im, aspect_ratio, boxes, hflip=True\n            )\n            masks_ts.append(masks_ar_hf)\n\n    # Combine the predicted soft masks\n    if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG':\n        masks_c = np.mean(masks_ts, axis=0)\n    elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX':\n        masks_c = np.amax(masks_ts, axis=0)\n    elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG':\n\n        def logit(y):\n            return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20))\n\n        logit_masks = [logit(y) for y in masks_ts]\n        logit_masks = np.mean(logit_masks, axis=0)\n        masks_c = 1.0 / (1.0 + np.exp(-logit_masks))\n    else:\n        raise NotImplementedError(\n            'Heuristic {} not supported'.format(cfg.TEST.MASK_AUG.HEUR)\n        )\n\n    return masks_c\n\n\ndef im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes):\n    \"\"\"Performs mask detection on the horizontally flipped image.\n    Function signature is the same as for im_detect_mask_aug.\n    \"\"\"\n    # Compute the masks for the flipped image\n    im_hf = im[:, ::-1, :]\n    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])\n\n    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)\n    masks_hf = im_detect_mask(model, im_scale, boxes_hf)\n\n    # Invert the predicted soft masks\n    masks_inv = masks_hf[:, :, :, ::-1]\n\n    return masks_inv\n\n\ndef im_detect_mask_scale(\n    model, im, target_scale, target_max_size, boxes, hflip=False\n):\n    \"\"\"Computes masks at the given scale.\"\"\"\n    if hflip:\n        masks_scl = im_detect_mask_hflip(\n            model, im, target_scale, target_max_size, boxes\n        )\n    else:\n        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)\n        masks_scl = im_detect_mask(model, im_scale, boxes)\n    return masks_scl\n\n\ndef im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, hflip=False):\n    \"\"\"Computes mask detections at the given width-relative aspect ratio.\"\"\"\n\n    # Perform mask detection on the transformed image\n    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)\n    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)\n\n    if hflip:\n        masks_ar = im_detect_mask_hflip(\n            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar\n        )\n    else:\n        im_scale = im_conv_body_only(\n            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE\n        )\n        masks_ar = im_detect_mask(model, im_scale, boxes_ar)\n\n    return masks_ar\n\n\ndef im_detect_keypoints(model, im_scale, boxes):\n    \"\"\"Infer instance keypoint poses. This function must be called after\n    im_detect_bbox as it assumes that the Caffe2 workspace is already populated\n    with the necessary blobs.\n\n    Arguments:\n        model (DetectionModelHelper): the detection model to use\n        im_scales (list): image blob scales as returned by im_detect_bbox\n        boxes (ndarray): R x 4 array of bounding box detections (e.g., as\n            returned by im_detect_bbox)\n\n    Returns:\n        pred_heatmaps (ndarray): R x J x M x M array of keypoint location\n            logits (softmax inputs) for each of the J keypoint types output\n            by the network (must be processed by keypoint_results to convert\n            into point predictions in the original image coordinate space)\n    \"\"\"\n    M = cfg.KRCNN.HEATMAP_SIZE\n    if boxes.shape[0] == 0:\n        pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32)\n        return pred_heatmaps\n\n    inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)}\n\n    # Add multi-level rois for FPN\n    if cfg.FPN.MULTILEVEL_ROIS:\n        _add_multilevel_rois_for_test(inputs, 'keypoint_rois')\n\n    for k, v in inputs.items():\n        workspace.FeedBlob(core.ScopedName(k), v)\n    workspace.RunNet(model.keypoint_net.Proto().name)\n\n    pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze()\n\n    # In case of 1\n    if pred_heatmaps.ndim == 3:\n        pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0)\n\n    return pred_heatmaps\n\n\ndef im_detect_keypoints_aug(model, im, boxes):\n    \"\"\"Computes keypoint predictions with test-time augmentations.\n\n    Arguments:\n        model (DetectionModelHelper): the detection model to use\n        im (ndarray): BGR image to test\n        boxes (ndarray): R x 4 array of bounding boxes\n\n    Returns:\n        heatmaps (ndarray): R x J x M x M array of keypoint location logits\n    \"\"\"\n\n    # Collect heatmaps predicted under different transformations\n    heatmaps_ts = []\n    # Tag predictions computed under downscaling and upscaling transformations\n    ds_ts = []\n    us_ts = []\n\n    def add_heatmaps_t(heatmaps_t, ds_t=False, us_t=False):\n        heatmaps_ts.append(heatmaps_t)\n        ds_ts.append(ds_t)\n        us_ts.append(us_t)\n\n    # Compute the heatmaps for the original image (identity transform)\n    im_scale = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)\n    heatmaps_i = im_detect_keypoints(model, im_scale, boxes)\n    add_heatmaps_t(heatmaps_i)\n\n    # Perform keypoints detection on the horizontally flipped image\n    if cfg.TEST.KPS_AUG.H_FLIP:\n        heatmaps_hf = im_detect_keypoints_hflip(\n            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes\n        )\n        add_heatmaps_t(heatmaps_hf)\n\n    # Compute detections at different scales\n    for scale in cfg.TEST.KPS_AUG.SCALES:\n        ds_scl = scale < cfg.TEST.SCALE\n        us_scl = scale > cfg.TEST.SCALE\n        heatmaps_scl = im_detect_keypoints_scale(\n            model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes\n        )\n        add_heatmaps_t(heatmaps_scl, ds_scl, us_scl)\n\n        if cfg.TEST.KPS_AUG.SCALE_H_FLIP:\n            heatmaps_scl_hf = im_detect_keypoints_scale(\n                model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes, hflip=True\n            )\n            add_heatmaps_t(heatmaps_scl_hf, ds_scl, us_scl)\n\n    # Compute keypoints at different aspect ratios\n    for aspect_ratio in cfg.TEST.KPS_AUG.ASPECT_RATIOS:\n        heatmaps_ar = im_detect_keypoints_aspect_ratio(\n            model, im, aspect_ratio, boxes\n        )\n        add_heatmaps_t(heatmaps_ar)\n\n        if cfg.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP:\n            heatmaps_ar_hf = im_detect_keypoints_aspect_ratio(\n                model, im, aspect_ratio, boxes, hflip=True\n            )\n            add_heatmaps_t(heatmaps_ar_hf)\n\n    # Select the heuristic function for combining the heatmaps\n    if cfg.TEST.KPS_AUG.HEUR == 'HM_AVG':\n        np_f = np.mean\n    elif cfg.TEST.KPS_AUG.HEUR == 'HM_MAX':\n        np_f = np.amax\n    else:\n        raise NotImplementedError(\n            'Heuristic {} not supported'.format(cfg.TEST.KPS_AUG.HEUR)\n        )\n\n    def heur_f(hms_ts):\n        return np_f(hms_ts, axis=0)\n\n    # Combine the heatmaps\n    if cfg.TEST.KPS_AUG.SCALE_SIZE_DEP:\n        heatmaps_c = combine_heatmaps_size_dep(\n            heatmaps_ts, ds_ts, us_ts, boxes, heur_f\n        )\n    else:\n        heatmaps_c = heur_f(heatmaps_ts)\n\n    return heatmaps_c\n\n\ndef im_detect_keypoints_hflip(model, im, target_scale, target_max_size, boxes):\n    \"\"\"Computes keypoint predictions on the horizontally flipped image.\n    Function signature is the same as for im_detect_keypoints_aug.\n    \"\"\"\n    # Compute keypoints for the flipped image\n    im_hf = im[:, ::-1, :]\n    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])\n\n    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)\n    heatmaps_hf = im_detect_keypoints(model, im_scale, boxes_hf)\n\n    # Invert the predicted keypoints\n    heatmaps_inv = keypoint_utils.flip_heatmaps(heatmaps_hf)\n\n    return heatmaps_inv\n\n\ndef im_detect_keypoints_scale(\n    model, im, target_scale, target_max_size, boxes, hflip=False\n):\n    \"\"\"Computes keypoint predictions at the given scale.\"\"\"\n    if hflip:\n        heatmaps_scl = im_detect_keypoints_hflip(\n            model, im, target_scale, target_max_size, boxes\n        )\n    else:\n        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)\n        heatmaps_scl = im_detect_keypoints(model, im_scale, boxes)\n    return heatmaps_scl\n\n\ndef im_detect_keypoints_aspect_ratio(\n    model, im, aspect_ratio, boxes, hflip=False\n):\n    \"\"\"Detects keypoints at the given width-relative aspect ratio.\"\"\"\n\n    # Perform keypoint detectionon the transformed image\n    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)\n    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)\n\n    if hflip:\n        heatmaps_ar = im_detect_keypoints_hflip(\n            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar\n        )\n    else:\n        im_scale = im_conv_body_only(\n            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE\n        )\n        heatmaps_ar = im_detect_keypoints(model, im_scale, boxes_ar)\n\n    return heatmaps_ar\n\n\ndef combine_heatmaps_size_dep(hms_ts, ds_ts, us_ts, boxes, heur_f):\n    \"\"\"Combines heatmaps while taking object sizes into account.\"\"\"\n    assert len(hms_ts) == len(ds_ts) and len(ds_ts) == len(us_ts), \\\n        'All sets of hms must be tagged with downscaling and upscaling flags'\n\n    # Classify objects into small+medium and large based on their box areas\n    areas = box_utils.boxes_area(boxes)\n    sm_objs = areas < cfg.TEST.KPS_AUG.AREA_TH\n    l_objs = areas >= cfg.TEST.KPS_AUG.AREA_TH\n\n    # Combine heatmaps computed under different transformations for each object\n    hms_c = np.zeros_like(hms_ts[0])\n\n    for i in range(hms_c.shape[0]):\n        hms_to_combine = []\n        for hms_t, ds_t, us_t in zip(hms_ts, ds_ts, us_ts):\n            # Discard downscaling predictions for small and medium objects\n            if sm_objs[i] and ds_t:\n                continue\n            # Discard upscaling predictions for large objects\n            if l_objs[i] and us_t:\n                continue\n            hms_to_combine.append(hms_t[i])\n        hms_c[i] = heur_f(hms_to_combine)\n\n    return hms_c\n\n\ndef box_results_with_nms_and_limit(scores, boxes):\n    \"\"\"Returns bounding-box detection results by thresholding on scores and\n    applying non-maximum suppression (NMS).\n\n    `boxes` has shape (#detections, 4 * #classes), where each row represents\n    a list of predicted bounding boxes for each of the object classes in the\n    dataset (including the background class). The detections in each row\n    originate from the same object proposal.\n\n    `scores` has shape (#detection, #classes), where each row represents a list\n    of object detection confidence scores for each of the object classes in the\n    dataset (including the background class). `scores[i, j]`` corresponds to the\n    box at `boxes[i, j * 4:(j + 1) * 4]`.\n    \"\"\"\n    num_classes = cfg.MODEL.NUM_CLASSES\n    cls_boxes = [[] for _ in range(num_classes)]\n    # Apply threshold on detection probabilities and apply NMS\n    # Skip j = 0, because it's the background class\n    for j in range(1, num_classes):\n        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]\n        scores_j = scores[inds, j]\n        boxes_j = boxes[inds, j * 4:(j + 1) * 4]\n        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(\n            np.float32, copy=False\n        )\n        if cfg.TEST.SOFT_NMS.ENABLED:\n            nms_dets, _ = box_utils.soft_nms(\n                dets_j,\n                sigma=cfg.TEST.SOFT_NMS.SIGMA,\n                overlap_thresh=cfg.TEST.NMS,\n                score_thresh=0.0001,\n                method=cfg.TEST.SOFT_NMS.METHOD\n            )\n        else:\n            keep = box_utils.nms(dets_j, cfg.TEST.NMS)\n            nms_dets = dets_j[keep, :]\n        # Refine the post-NMS boxes using bounding-box voting\n        if cfg.TEST.BBOX_VOTE.ENABLED:\n            nms_dets = box_utils.box_voting(\n                nms_dets,\n                dets_j,\n                cfg.TEST.BBOX_VOTE.VOTE_TH,\n                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD\n            )\n        cls_boxes[j] = nms_dets\n\n    # Limit to max_per_image detections **over all classes**\n    if cfg.TEST.DETECTIONS_PER_IM > 0:\n        image_scores = np.hstack(\n            [cls_boxes[j][:, -1] for j in range(1, num_classes)]\n        )\n        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:\n            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]\n            for j in range(1, num_classes):\n                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]\n                cls_boxes[j] = cls_boxes[j][keep, :]\n\n    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])\n    boxes = im_results[:, :-1]\n    scores = im_results[:, -1]\n    return scores, boxes, cls_boxes\n\n\ndef segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):\n    num_classes = cfg.MODEL.NUM_CLASSES\n    cls_segms = [[] for _ in range(num_classes)]\n    mask_ind = 0\n    # To work around an issue with cv2.resize (it seems to automatically pad\n    # with repeated border values), we manually zero-pad the masks by 1 pixel\n    # prior to resizing back to the original image resolution. This prevents\n    # \"top hat\" artifacts. We therefore need to expand the reference boxes by an\n    # appropriate factor.\n    M = cfg.MRCNN.RESOLUTION\n    scale = (M + 2.0) / M\n    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)\n    ref_boxes = ref_boxes.astype(np.int32)\n    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)\n\n    # skip j = 0, because it's the background class\n    for j in range(1, num_classes):\n        segms = []\n        for _ in range(cls_boxes[j].shape[0]):\n            if cfg.MRCNN.CLS_SPECIFIC_MASK:\n                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]\n            else:\n                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]\n\n            ref_box = ref_boxes[mask_ind, :]\n            w = ref_box[2] - ref_box[0] + 1\n            h = ref_box[3] - ref_box[1] + 1\n            w = np.maximum(w, 1)\n            h = np.maximum(h, 1)\n\n            mask = cv2.resize(padded_mask, (w, h))\n            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)\n            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)\n\n            x_0 = max(ref_box[0], 0)\n            x_1 = min(ref_box[2] + 1, im_w)\n            y_0 = max(ref_box[1], 0)\n            y_1 = min(ref_box[3] + 1, im_h)\n\n            im_mask[y_0:y_1, x_0:x_1] = mask[\n                (y_0 - ref_box[1]):(y_1 - ref_box[1]),\n                (x_0 - ref_box[0]):(x_1 - ref_box[0])\n            ]\n\n            # Get RLE encoding used by the COCO evaluation API\n            rle = mask_util.encode(\n                np.array(im_mask[:, :, np.newaxis], order='F')\n            )[0]\n            segms.append(rle)\n\n            mask_ind += 1\n\n        cls_segms[j] = segms\n\n    assert mask_ind == masks.shape[0]\n    return cls_segms\n\n\ndef keypoint_results(cls_boxes, pred_heatmaps, ref_boxes):\n    num_classes = cfg.MODEL.NUM_CLASSES\n    cls_keyps = [[] for _ in range(num_classes)]\n    person_idx = keypoint_utils.get_person_class_index()\n    xy_preds = keypoint_utils.heatmaps_to_keypoints(pred_heatmaps, ref_boxes)\n\n    # NMS OKS\n    if cfg.KRCNN.NMS_OKS:\n        keep = keypoint_utils.nms_oks(xy_preds, ref_boxes, 0.3)\n        xy_preds = xy_preds[keep, :, :]\n        ref_boxes = ref_boxes[keep, :]\n        pred_heatmaps = pred_heatmaps[keep, :, :, :]\n        cls_boxes[person_idx] = cls_boxes[person_idx][keep, :]\n\n    kps = [xy_preds[i] for i in range(xy_preds.shape[0])]\n    cls_keyps[person_idx] = kps\n    return cls_keyps\n\n\ndef _get_rois_blob(im_rois, im_scale):\n    \"\"\"Converts RoIs into network inputs.\n\n    Arguments:\n        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates\n        im_scale_factors (list): scale factors as returned by _get_image_blob\n\n    Returns:\n        blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns\n            [level, x1, y1, x2, y2]\n    \"\"\"\n    rois, levels = _project_im_rois(im_rois, im_scale)\n    rois_blob = np.hstack((levels, rois))\n    return rois_blob.astype(np.float32, copy=False)\n\n\ndef _project_im_rois(im_rois, scales):\n    \"\"\"Project image RoIs into the image pyramid built by _get_image_blob.\n\n    Arguments:\n        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates\n        scales (list): scale factors as returned by _get_image_blob\n\n    Returns:\n        rois (ndarray): R x 4 matrix of projected RoI coordinates\n        levels (ndarray): image pyramid levels used by each projected RoI\n    \"\"\"\n    rois = im_rois.astype(float, copy=False) * scales\n    levels = np.zeros((im_rois.shape[0], 1), dtype=int)\n    return rois, levels\n\n\ndef _add_multilevel_rois_for_test(blobs, name):\n    \"\"\"Distributes a set of RoIs across FPN pyramid levels by creating new level\n    specific RoI blobs.\n\n    Arguments:\n        blobs (dict): dictionary of blobs\n        name (str): a key in 'blobs' identifying the source RoI blob\n\n    Returns:\n        [by ref] blobs (dict): new keys named by `name + 'fpn' + level`\n            are added to dict each with a value that's an R_level x 5 ndarray of\n            RoIs (see _get_rois_blob for format)\n    \"\"\"\n    lvl_min = cfg.FPN.ROI_MIN_LEVEL\n    lvl_max = cfg.FPN.ROI_MAX_LEVEL\n    lvls = fpn.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max)\n    fpn.add_multilevel_roi_blobs(\n        blobs, name, blobs[name], lvls, lvl_min, lvl_max\n    )\n\n\ndef _get_blobs(im, rois, target_scale, target_max_size):\n    \"\"\"Convert an image and RoIs within that image into network inputs.\"\"\"\n    blobs = {}\n    blobs['data'], im_scale, blobs['im_info'] = \\\n        blob_utils.get_image_blob(im, target_scale, target_max_size)\n    if rois is not None:\n        blobs['rois'] = _get_rois_blob(rois, im_scale)\n    return blobs, im_scale\n"
  },
  {
    "path": "detectron/core/test_engine.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Test a Detectron network on an imdb (image database).\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import defaultdict\nimport cv2\nimport datetime\nimport logging\nimport numpy as np\nimport os\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.core.config import get_output_dir\nfrom detectron.core.rpn_generator import generate_rpn_on_dataset\nfrom detectron.core.rpn_generator import generate_rpn_on_range\nfrom detectron.core.test import im_detect_all\nfrom detectron.datasets import task_evaluation\nfrom detectron.datasets.json_dataset import JsonDataset\nfrom detectron.modeling import model_builder\nfrom detectron.utils.io import save_object\nfrom detectron.utils.timer import Timer\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.env as envu\nimport detectron.utils.net as net_utils\nimport detectron.utils.subprocess as subprocess_utils\nimport detectron.utils.vis as vis_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_eval_functions():\n    # Determine which parent or child function should handle inference\n    if cfg.MODEL.RPN_ONLY:\n        child_func = generate_rpn_on_range\n        parent_func = generate_rpn_on_dataset\n    else:\n        # Generic case that handles all network types other than RPN-only nets\n        # and RetinaNet\n        child_func = test_net\n        parent_func = test_net_on_dataset\n\n    return parent_func, child_func\n\n\ndef get_inference_dataset(index, is_parent=True):\n    assert is_parent or len(cfg.TEST.DATASETS) == 1, \\\n        'The child inference process can only work on a single dataset'\n\n    dataset_name = cfg.TEST.DATASETS[index]\n\n    if cfg.TEST.PRECOMPUTED_PROPOSALS:\n        assert is_parent or len(cfg.TEST.PROPOSAL_FILES) == 1, \\\n            'The child inference process can only work on a single proposal file'\n        assert len(cfg.TEST.PROPOSAL_FILES) == len(cfg.TEST.DATASETS), \\\n            'If proposals are used, one proposal file must be specified for ' \\\n            'each dataset'\n        proposal_file = cfg.TEST.PROPOSAL_FILES[index]\n    else:\n        proposal_file = None\n\n    return dataset_name, proposal_file\n\n\ndef run_inference(\n    weights_file, ind_range=None,\n    multi_gpu_testing=False, gpu_id=0,\n    check_expected_results=False,\n):\n    parent_func, child_func = get_eval_functions()\n    is_parent = ind_range is None\n\n    def result_getter():\n        if is_parent:\n            # Parent case:\n            # In this case we're either running inference on the entire dataset in a\n            # single process or (if multi_gpu_testing is True) using this process to\n            # launch subprocesses that each run inference on a range of the dataset\n            all_results = {}\n            for i in range(len(cfg.TEST.DATASETS)):\n                dataset_name, proposal_file = get_inference_dataset(i)\n                output_dir = get_output_dir(dataset_name, training=False)\n                results = parent_func(\n                    weights_file,\n                    dataset_name,\n                    proposal_file,\n                    output_dir,\n                    multi_gpu=multi_gpu_testing\n                )\n                all_results.update(results)\n\n            return all_results\n        else:\n            # Subprocess child case:\n            # In this case test_net was called via subprocess.Popen to execute on a\n            # range of inputs on a single dataset\n            dataset_name, proposal_file = get_inference_dataset(0, is_parent=False)\n            output_dir = get_output_dir(dataset_name, training=False)\n            return child_func(\n                weights_file,\n                dataset_name,\n                proposal_file,\n                output_dir,\n                ind_range=ind_range,\n                gpu_id=gpu_id\n            )\n\n    all_results = result_getter()\n    if check_expected_results and is_parent:\n        task_evaluation.check_expected_results(\n            all_results,\n            atol=cfg.EXPECTED_RESULTS_ATOL,\n            rtol=cfg.EXPECTED_RESULTS_RTOL\n        )\n        task_evaluation.log_copy_paste_friendly_results(all_results)\n\n    return all_results\n\n\ndef test_net_on_dataset(\n    weights_file,\n    dataset_name,\n    proposal_file,\n    output_dir,\n    multi_gpu=False,\n    gpu_id=0\n):\n    \"\"\"Run inference on a dataset.\"\"\"\n    dataset = JsonDataset(dataset_name)\n    test_timer = Timer()\n    test_timer.tic()\n    if multi_gpu:\n        num_images = len(dataset.get_roidb())\n        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(\n            weights_file, dataset_name, proposal_file, num_images, output_dir\n        )\n    else:\n        all_boxes, all_segms, all_keyps = test_net(\n            weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id\n        )\n    test_timer.toc()\n    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))\n    results = task_evaluation.evaluate_all(\n        dataset, all_boxes, all_segms, all_keyps, output_dir\n    )\n    return results\n\n\ndef multi_gpu_test_net_on_dataset(\n    weights_file, dataset_name, proposal_file, num_images, output_dir\n):\n    \"\"\"Multi-gpu inference on a dataset.\"\"\"\n    binary_dir = envu.get_runtime_dir()\n    binary_ext = envu.get_py_bin_ext()\n    binary = os.path.join(binary_dir, 'test_net' + binary_ext)\n    assert os.path.exists(binary), 'Binary \\'{}\\' not found'.format(binary)\n\n    # Pass the target dataset and proposal file (if any) via the command line\n    opts = ['TEST.DATASETS', '(\"{}\",)'.format(dataset_name)]\n    opts += ['TEST.WEIGHTS', weights_file]\n    if proposal_file:\n        opts += ['TEST.PROPOSAL_FILES', '(\"{}\",)'.format(proposal_file)]\n\n    # Run inference in parallel in subprocesses\n    # Outputs will be a list of outputs from each subprocess, where the output\n    # of each subprocess is the dictionary saved by test_net().\n    outputs = subprocess_utils.process_in_parallel(\n        'detection', num_images, binary, output_dir, opts\n    )\n\n    # Collate the results from each subprocess\n    all_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]\n    all_segms = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]\n    all_keyps = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]\n    for det_data in outputs:\n        all_boxes_batch = det_data['all_boxes']\n        all_segms_batch = det_data['all_segms']\n        all_keyps_batch = det_data['all_keyps']\n        for cls_idx in range(1, cfg.MODEL.NUM_CLASSES):\n            all_boxes[cls_idx] += all_boxes_batch[cls_idx]\n            all_segms[cls_idx] += all_segms_batch[cls_idx]\n            all_keyps[cls_idx] += all_keyps_batch[cls_idx]\n    det_file = os.path.join(output_dir, 'detections.pkl')\n    cfg_yaml = envu.yaml_dump(cfg)\n    save_object(\n        dict(\n            all_boxes=all_boxes,\n            all_segms=all_segms,\n            all_keyps=all_keyps,\n            cfg=cfg_yaml\n        ), det_file\n    )\n    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))\n\n    return all_boxes, all_segms, all_keyps\n\n\ndef test_net(\n    weights_file,\n    dataset_name,\n    proposal_file,\n    output_dir,\n    ind_range=None,\n    gpu_id=0\n):\n    \"\"\"Run inference on all images in a dataset or over an index range of images\n    in a dataset using a single GPU.\n    \"\"\"\n    assert not cfg.MODEL.RPN_ONLY, \\\n        'Use rpn_generate to generate proposals from RPN-only models'\n\n    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(\n        dataset_name, proposal_file, ind_range\n    )\n    model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id)\n    num_images = len(roidb)\n    num_classes = cfg.MODEL.NUM_CLASSES\n    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)\n    timers = defaultdict(Timer)\n    for i, entry in enumerate(roidb):\n        if cfg.TEST.PRECOMPUTED_PROPOSALS:\n            # The roidb may contain ground-truth rois (for example, if the roidb\n            # comes from the training or val split). We only want to evaluate\n            # detection on the *non*-ground-truth rois. We select only the rois\n            # that have the gt_classes field set to 0, which means there's no\n            # ground truth.\n            box_proposals = entry['boxes'][entry['gt_classes'] == 0]\n            if len(box_proposals) == 0:\n                continue\n        else:\n            # Faster R-CNN type models generate proposals on-the-fly with an\n            # in-network RPN; 1-stage models don't require proposals.\n            box_proposals = None\n\n        im = cv2.imread(entry['image'])\n        with c2_utils.NamedCudaScope(gpu_id):\n            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(\n                model, im, box_proposals, timers\n            )\n\n        extend_results(i, all_boxes, cls_boxes_i)\n        if cls_segms_i is not None:\n            extend_results(i, all_segms, cls_segms_i)\n        if cls_keyps_i is not None:\n            extend_results(i, all_keyps, cls_keyps_i)\n\n        if i % 10 == 0:  # Reduce log file size\n            ave_total_time = np.sum([t.average_time for t in timers.values()])\n            eta_seconds = ave_total_time * (num_images - i - 1)\n            eta = str(datetime.timedelta(seconds=int(eta_seconds)))\n            det_time = (\n                timers['im_detect_bbox'].average_time +\n                timers['im_detect_mask'].average_time +\n                timers['im_detect_keypoints'].average_time\n            )\n            misc_time = (\n                timers['misc_bbox'].average_time +\n                timers['misc_mask'].average_time +\n                timers['misc_keypoints'].average_time\n            )\n            logger.info(\n                (\n                    'im_detect: range [{:d}, {:d}] of {:d}: '\n                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'\n                ).format(\n                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,\n                    start_ind + num_images, det_time, misc_time, eta\n                )\n            )\n\n        if cfg.VIS:\n            im_name = os.path.splitext(os.path.basename(entry['image']))[0]\n            vis_utils.vis_one_image(\n                im[:, :, ::-1],\n                '{:d}_{:s}'.format(i, im_name),\n                os.path.join(output_dir, 'vis'),\n                cls_boxes_i,\n                segms=cls_segms_i,\n                keypoints=cls_keyps_i,\n                thresh=cfg.VIS_TH,\n                box_alpha=0.8,\n                dataset=dataset,\n                show_class=True\n            )\n\n    cfg_yaml = envu.yaml_dump(cfg)\n    if ind_range is not None:\n        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)\n    else:\n        det_name = 'detections.pkl'\n    det_file = os.path.join(output_dir, det_name)\n    save_object(\n        dict(\n            all_boxes=all_boxes,\n            all_segms=all_segms,\n            all_keyps=all_keyps,\n            cfg=cfg_yaml\n        ), det_file\n    )\n    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))\n    return all_boxes, all_segms, all_keyps\n\n\ndef initialize_model_from_cfg(weights_file, gpu_id=0):\n    \"\"\"Initialize a model from the global cfg. Loads test-time weights and\n    creates the networks in the Caffe2 workspace.\n    \"\"\"\n    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)\n    net_utils.initialize_gpu_from_weights_file(\n        model, weights_file, gpu_id=gpu_id,\n    )\n    model_builder.add_inference_inputs(model)\n    workspace.CreateNet(model.net)\n    workspace.CreateNet(model.conv_body_net)\n    if cfg.MODEL.MASK_ON:\n        workspace.CreateNet(model.mask_net)\n    if cfg.MODEL.KEYPOINTS_ON:\n        workspace.CreateNet(model.keypoint_net)\n    return model\n\n\ndef get_roidb_and_dataset(dataset_name, proposal_file, ind_range):\n    \"\"\"Get the roidb for the dataset specified in the global cfg. Optionally\n    restrict it to a range of indices if ind_range is a pair of integers.\n    \"\"\"\n    dataset = JsonDataset(dataset_name)\n    if cfg.TEST.PRECOMPUTED_PROPOSALS:\n        assert proposal_file, 'No proposal file given'\n        roidb = dataset.get_roidb(\n            proposal_file=proposal_file,\n            proposal_limit=cfg.TEST.PROPOSAL_LIMIT\n        )\n    else:\n        roidb = dataset.get_roidb()\n\n    if ind_range is not None:\n        total_num_images = len(roidb)\n        start, end = ind_range\n        roidb = roidb[start:end]\n    else:\n        start = 0\n        end = len(roidb)\n        total_num_images = end\n\n    return roidb, dataset, start, end, total_num_images\n\n\ndef empty_results(num_classes, num_images):\n    \"\"\"Return empty results lists for boxes, masks, and keypoints.\n    Box detections are collected into:\n      all_boxes[cls][image] = N x 5 array with columns (x1, y1, x2, y2, score)\n    Instance mask predictions are collected into:\n      all_segms[cls][image] = [...] list of COCO RLE encoded masks that are in\n      1:1 correspondence with the boxes in all_boxes[cls][image]\n    Keypoint predictions are collected into:\n      all_keyps[cls][image] = [...] list of keypoints results, each encoded as\n      a 3D array (#rois, 4, #keypoints) with the 4 rows corresponding to\n      [x, y, logit, prob] (See: utils.keypoints.heatmaps_to_keypoints).\n      Keypoints are recorded for person (cls = 1); they are in 1:1\n      correspondence with the boxes in all_boxes[cls][image].\n    \"\"\"\n    # Note: do not be tempted to use [[] * N], which gives N references to the\n    # *same* empty list.\n    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]\n    all_segms = [[[] for _ in range(num_images)] for _ in range(num_classes)]\n    all_keyps = [[[] for _ in range(num_images)] for _ in range(num_classes)]\n    return all_boxes, all_segms, all_keyps\n\n\ndef extend_results(index, all_res, im_res):\n    \"\"\"Add results for an image to the set of all results at the specified\n    index.\n    \"\"\"\n    # Skip cls_idx 0 (__background__)\n    for cls_idx in range(1, len(im_res)):\n        all_res[cls_idx][index] = im_res[cls_idx]\n"
  },
  {
    "path": "detectron/core/test_retinanet.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Test a RetinaNet network on an image database\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport logging\nfrom collections import defaultdict\n\nfrom caffe2.python import core, workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.modeling.generate_anchors import generate_anchors\nfrom detectron.utils.timer import Timer\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef _create_cell_anchors():\n    \"\"\"\n    Generate all types of anchors for all fpn levels/scales/aspect ratios.\n    This function is called only once at the beginning of inference.\n    \"\"\"\n    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL\n    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE\n    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS\n    anchor_scale = cfg.RETINANET.ANCHOR_SCALE\n    A = scales_per_octave * len(aspect_ratios)\n    anchors = {}\n    for lvl in range(k_min, k_max + 1):\n        # create cell anchors array\n        stride = 2. ** lvl\n        cell_anchors = np.zeros((A, 4))\n        a = 0\n        for octave in range(scales_per_octave):\n            octave_scale = 2 ** (octave / float(scales_per_octave))\n            for aspect in aspect_ratios:\n                anchor_sizes = (stride * octave_scale * anchor_scale, )\n                anchor_aspect_ratios = (aspect, )\n                cell_anchors[a, :] = generate_anchors(\n                    stride=stride, sizes=anchor_sizes,\n                    aspect_ratios=anchor_aspect_ratios)\n                a += 1\n        anchors[lvl] = cell_anchors\n    return anchors\n\n\ndef im_detect_bbox(model, im, timers=None):\n    \"\"\"Generate RetinaNet detections on a single image.\"\"\"\n    if timers is None:\n        timers = defaultdict(Timer)\n    # Although anchors are input independent and could be precomputed,\n    # recomputing them per image only brings a small overhead\n    anchors = _create_cell_anchors()\n    timers['im_detect_bbox'].tic()\n    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL\n    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)\n    inputs = {}\n    inputs['data'], im_scale, inputs['im_info'] = \\\n        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)\n    cls_probs, box_preds = [], []\n    for lvl in range(k_min, k_max + 1):\n        suffix = 'fpn{}'.format(lvl)\n        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))\n        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))\n    for k, v in inputs.items():\n        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))\n\n    workspace.RunNet(model.net.Proto().name)\n    cls_probs = workspace.FetchBlobs(cls_probs)\n    box_preds = workspace.FetchBlobs(box_preds)\n\n    # here the boxes_all are [x0, y0, x1, y1, score]\n    boxes_all = defaultdict(list)\n\n    cnt = 0\n    for lvl in range(k_min, k_max + 1):\n        # create cell anchors array\n        stride = 2. ** lvl\n        cell_anchors = anchors[lvl]\n\n        # fetch per level probability\n        cls_prob = cls_probs[cnt]\n        box_pred = box_preds[cnt]\n        cls_prob = cls_prob.reshape((\n            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),\n            cls_prob.shape[2], cls_prob.shape[3]))\n        box_pred = box_pred.reshape((\n            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))\n        cnt += 1\n\n        if cfg.RETINANET.SOFTMAX:\n            cls_prob = cls_prob[:, :, 1::, :, :]\n\n        cls_prob_ravel = cls_prob.ravel()\n        # In some cases [especially for very small img sizes], it's possible that\n        # candidate_ind is empty if we impose threshold 0.05 at all levels. This\n        # will lead to errors since no detections are found for this image. Hence,\n        # for lvl 7 which has small spatial resolution, we take the threshold 0.0\n        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0\n        candidate_inds = np.where(cls_prob_ravel > th)[0]\n        if (len(candidate_inds) == 0):\n            continue\n\n        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))\n        inds = np.argpartition(\n            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]\n        inds = candidate_inds[inds]\n\n        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()\n        classes = inds_5d[:, 2]\n        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]\n        scores = cls_prob[:, anchor_ids, classes, y, x]\n\n        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)\n        boxes *= stride\n        boxes += cell_anchors[anchor_ids, :]\n\n        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:\n            box_deltas = box_pred[0, anchor_ids, :, y, x]\n        else:\n            box_cls_inds = classes * 4\n            box_deltas = np.vstack(\n                [box_pred[0, ind:ind + 4, yi, xi]\n                 for ind, yi, xi in zip(box_cls_inds, y, x)]\n            )\n        pred_boxes = (\n            box_utils.bbox_transform(boxes, box_deltas)\n            if cfg.TEST.BBOX_REG else boxes)\n        pred_boxes /= im_scale\n        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)\n        box_scores = np.zeros((pred_boxes.shape[0], 5))\n        box_scores[:, 0:4] = pred_boxes\n        box_scores[:, 4] = scores\n\n        for cls in range(1, cfg.MODEL.NUM_CLASSES):\n            inds = np.where(classes == cls - 1)[0]\n            if len(inds) > 0:\n                boxes_all[cls].extend(box_scores[inds, :])\n    timers['im_detect_bbox'].toc()\n\n    # Combine predictions across all levels and retain the top scoring by class\n    timers['misc_bbox'].tic()\n    detections = []\n    for cls, boxes in boxes_all.items():\n        cls_dets = np.vstack(boxes).astype(dtype=np.float32)\n        # do class specific nms here\n        if cfg.TEST.SOFT_NMS.ENABLED:\n            cls_dets, keep = box_utils.soft_nms(\n                cls_dets,\n                sigma=cfg.TEST.SOFT_NMS.SIGMA,\n                overlap_thresh=cfg.TEST.NMS,\n                score_thresh=0.0001,\n                method=cfg.TEST.SOFT_NMS.METHOD\n            )\n        else:\n            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)\n            cls_dets = cls_dets[keep, :]\n        out = np.zeros((len(keep), 6))\n        out[:, 0:5] = cls_dets\n        out[:, 5].fill(cls)\n        detections.append(out)\n\n    # detections (N, 6) format:\n    #   detections[:, :4] - boxes\n    #   detections[:, 4] - scores\n    #   detections[:, 5] - classes\n    detections = np.vstack(detections)\n    # sort all again\n    inds = np.argsort(-detections[:, 4])\n    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]\n\n    # Convert the detections to image cls_ format (see core/test_engine.py)\n    num_classes = cfg.MODEL.NUM_CLASSES\n    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]\n    for c in range(1, num_classes):\n        inds = np.where(detections[:, 5] == c)[0]\n        cls_boxes[c] = detections[inds, :5]\n    timers['misc_bbox'].toc()\n\n    return cls_boxes\n"
  },
  {
    "path": "detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m",
    "content": "function VOCopts = get_voc_opts(path)\n\ntmp = pwd;\ncd(path);\ntry\n  addpath('VOCcode');\n  VOCinit;\ncatch\n  rmpath('VOCcode');\n  cd(tmp);\n  error(sprintf('VOCcode directory not found under %s', path));\nend\nrmpath('VOCcode');\ncd(tmp);\n"
  },
  {
    "path": "detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m",
    "content": "function res = voc_eval(path, comp_id, test_set, output_dir)\n\nVOCopts = get_voc_opts(path);\nVOCopts.testset = test_set;\n\nfor i = 1:length(VOCopts.classes)\n  cls = VOCopts.classes{i};\n  res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);\nend\n\nfprintf('\\n~~~~~~~~~~~~~~~~~~~~\\n');\nfprintf('Results:\\n');\naps = [res(:).ap]';\nfprintf('%.1f\\n', aps * 100);\nfprintf('%.1f\\n', mean(aps) * 100);\nfprintf('~~~~~~~~~~~~~~~~~~~~\\n');\n\nfunction res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)\n\ntest_set = VOCopts.testset;\nyear = VOCopts.dataset(4:end);\n\naddpath(fullfile(VOCopts.datadir, 'VOCcode'));\n\nres_fn = sprintf(VOCopts.detrespath, comp_id, cls);\n\nrecall = [];\nprec = [];\nap = 0;\nap_auc = 0;\n\ndo_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');\nif do_eval\n  % Bug in VOCevaldet requires that tic has been called first\n  tic;\n  [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);\n  ap_auc = xVOCap(recall, prec);\n\n  % force plot limits\n  ylim([0 1]);\n  xlim([0 1]);\n\n  print(gcf, '-djpeg', '-r0', ...\n        [output_dir '/' cls '_pr.jpg']);\nend\nfprintf('!!! %s : %.4f %.4f\\n', cls, ap, ap_auc);\n\nres.recall = recall;\nres.prec = prec;\nres.ap = ap;\nres.ap_auc = ap_auc;\n\nsave([output_dir '/' cls '_pr.mat'], ...\n     'res', 'recall', 'prec', 'ap', 'ap_auc');\n\nrmpath(fullfile(VOCopts.datadir, 'VOCcode'));\n"
  },
  {
    "path": "detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m",
    "content": "function ap = xVOCap(rec,prec)\r\n% From the PASCAL VOC 2011 devkit\r\n\r\nmrec=[0 ; rec ; 1];\r\nmpre=[0 ; prec ; 0];\r\nfor i=numel(mpre)-1:-1:1\r\n    mpre(i)=max(mpre(i),mpre(i+1));\r\nend\r\ni=find(mrec(2:end)~=mrec(1:end-1))+1;\r\nap=sum((mrec(i)-mrec(i-1)).*mpre(i));\r\n"
  },
  {
    "path": "detectron/datasets/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/datasets/cityscapes_json_dataset_evaluator.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Functions for evaluating results on Cityscapes.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport logging\nimport os\nimport uuid\n\nimport pycocotools.mask as mask_util\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets.dataset_catalog import get_raw_dir\n\nlogger = logging.getLogger(__name__)\n\n\ndef evaluate_masks(\n    json_dataset,\n    all_boxes,\n    all_segms,\n    output_dir,\n    use_salt=True,\n    cleanup=False\n):\n    if cfg.CLUSTER.ON_CLUSTER:\n        # On the cluster avoid saving these files in the job directory\n        output_dir = '/tmp'\n    res_file = os.path.join(\n        output_dir, 'segmentations_' + json_dataset.name + '_results')\n    if use_salt:\n        res_file += '_{}'.format(str(uuid.uuid4()))\n    res_file += '.json'\n\n    results_dir = os.path.join(output_dir, 'results')\n    if not os.path.exists(results_dir):\n        os.mkdir(results_dir)\n\n    os.environ['CITYSCAPES_DATASET'] = get_raw_dir(json_dataset.name)\n    os.environ['CITYSCAPES_RESULTS'] = output_dir\n\n    # Load the Cityscapes eval script *after* setting the required env vars,\n    # since the script reads their values into global variables (at load time).\n    import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \\\n        as cityscapes_eval\n\n    roidb = json_dataset.get_roidb()\n    for i, entry in enumerate(roidb):\n        im_name = entry['image']\n\n        basename = os.path.splitext(os.path.basename(im_name))[0]\n        txtname = os.path.join(output_dir, basename + 'pred.txt')\n        with open(txtname, 'w') as fid_txt:\n            if i % 10 == 0:\n                logger.info('i: {}: {}'.format(i, basename))\n            for j in range(1, len(all_segms)):\n                clss = json_dataset.classes[j]\n                clss_id = cityscapes_eval.name2label[clss].id\n                segms = all_segms[j][i]\n                boxes = all_boxes[j][i]\n                if segms == []:\n                    continue\n                masks = mask_util.decode(segms)\n\n                for k in range(boxes.shape[0]):\n                    score = boxes[k, -1]\n                    mask = masks[:, :, k]\n                    pngname = os.path.join(\n                        'results',\n                        basename + '_' + clss + '_{}.png'.format(k))\n                    # write txt\n                    fid_txt.write('{} {} {}\\n'.format(pngname, clss_id, score))\n                    # save mask\n                    cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)\n    logger.info('Evaluating...')\n    cityscapes_eval.main([])\n    return None\n"
  },
  {
    "path": "detectron/datasets/coco_to_cityscapes_id.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# mapping coco categories to cityscapes (our converted json) id\n# cityscapes\n# INFO roidb.py: 220: 1       bicycle: 7286\n# INFO roidb.py: 220: 2           car: 53684\n# INFO roidb.py: 220: 3        person: 35704\n# INFO roidb.py: 220: 4         train: 336\n# INFO roidb.py: 220: 5         truck: 964\n# INFO roidb.py: 220: 6    motorcycle: 1468\n# INFO roidb.py: 220: 7           bus: 758\n# INFO roidb.py: 220: 8         rider: 3504\n\n# coco (val5k)\n# INFO roidb.py: 220: 1        person: 21296\n# INFO roidb.py: 220: 2       bicycle: 628\n# INFO roidb.py: 220: 3           car: 3818\n# INFO roidb.py: 220: 4    motorcycle: 732\n# INFO roidb.py: 220: 5      airplane: 286 <------ irrelevant\n# INFO roidb.py: 220: 6           bus: 564\n# INFO roidb.py: 220: 7         train: 380\n# INFO roidb.py: 220: 8         truck: 828\n\n\ndef cityscapes_to_coco(cityscapes_id):\n    lookup = {\n        0: 0,  # ... background\n        1: 2,  # bicycle\n        2: 3,  # car\n        3: 1,  # person\n        4: 7,  # train\n        5: 8,  # truck\n        6: 4,  # motorcycle\n        7: 6,  # bus\n        8: -1,  # rider (-1 means rand init)\n    }\n    return lookup[cityscapes_id]\n\n\ndef cityscapes_to_coco_with_rider(cityscapes_id):\n    lookup = {\n        0: 0,  # ... background\n        1: 2,  # bicycle\n        2: 3,  # car\n        3: 1,  # person\n        4: 7,  # train\n        5: 8,  # truck\n        6: 4,  # motorcycle\n        7: 6,  # bus\n        8: 1,  # rider (\"person\", *rider has human right!*)\n    }\n    return lookup[cityscapes_id]\n\n\ndef cityscapes_to_coco_without_person_rider(cityscapes_id):\n    lookup = {\n        0: 0,  # ... background\n        1: 2,  # bicycle\n        2: 3,  # car\n        3: -1,  # person (ignore)\n        4: 7,  # train\n        5: 8,  # truck\n        6: 4,  # motorcycle\n        7: 6,  # bus\n        8: -1,  # rider (ignore)\n    }\n    return lookup[cityscapes_id]\n\n\ndef cityscapes_to_coco_all_random(cityscapes_id):\n    lookup = {\n        0: -1,  # ... background\n        1: -1,  # bicycle\n        2: -1,  # car\n        3: -1,  # person (ignore)\n        4: -1,  # train\n        5: -1,  # truck\n        6: -1,  # motorcycle\n        7: -1,  # bus\n        8: -1,  # rider (ignore)\n    }\n    return lookup[cityscapes_id]\n"
  },
  {
    "path": "detectron/datasets/data/README.md",
    "content": "# Setting Up Datasets\n\nThis directory contains symlinks to data locations.\n\n## Creating Symlinks for COCO\n\nSymlink the COCO dataset:\n\n```\nln -s /path/to/coco $DETECTRON/detectron/datasets/data/coco\n```\n\nWe assume that your local COCO dataset copy at `/path/to/coco` has the following directory structure:\n\n```\ncoco\n|_ coco_train2014\n|  |_ <im-1-name>.jpg\n|  |_ ...\n|  |_ <im-N-name>.jpg\n|_ coco_val2014\n|_ ...\n|_ annotations\n   |_ instances_train2014.json\n   |_ ...\n```\n\nIf that is not the case, you may need to do something similar to:\n\n```\nmkdir -p $DETECTRON/detectron/datasets/data/coco\nln -s /path/to/coco_train2014 $DETECTRON/detectron/datasets/data/coco/coco_train2014\nln -s /path/to/coco_val2014 $DETECTRON/detectron/datasets/data/coco/coco_val2014\nln -s /path/to/json/annotations $DETECTRON/detectron/datasets/data/coco/annotations\n```\n\n### COCO Minival Annotations\n\nOur custom `minival` and `valminusminival` annotations are available for download [here](https://dl.fbaipublicfiles.com/detectron/coco/coco_annotations_minival.tgz).\nPlease note that `minival` is exactly equivalent to the recently defined 2017 `val` set.\nSimilarly, the union of `valminusminival` and the 2014 `train` is exactly equivalent to the 2017 `train` set. To complete installation of the COCO dataset, you will need to copy the `minival` and `valminusminival` json annotation files to the `coco/annotations` directory referenced above.\n\n## Creating Symlinks for PASCAL VOC\n\nWe assume that your symlinked `detectron/datasets/data/VOC<year>` directory has the following structure:\n\n```\nVOC<year>\n|_ JPEGImages\n|  |_ <im-1-name>.jpg\n|  |_ ...\n|  |_ <im-N-name>.jpg\n|_ annotations\n|  |_ voc_<year>_train.json\n|  |_ voc_<year>_val.json\n|  |_ ...\n|_ VOCdevkit<year>\n```\n\nCreate symlinks for `VOC<year>`:\n\n```\nmkdir -p $DETECTRON/detectron/datasets/data/VOC<year>\nln -s /path/to/VOC<year>/JPEGImages $DETECTRON/detectron/datasets/data/VOC<year>/JPEGImages\nln -s /path/to/VOC<year>/json/annotations $DETECTRON/detectron/datasets/data/VOC<year>/annotations\nln -s /path/to/VOC<year>/devkit $DETECTRON/detectron/datasets/data/VOC<year>/VOCdevkit<year>\n```\n\n### PASCAL VOC Annotations in COCO Format\n\nWe expect PASCAL VOC annotations converted to COCO json format, which are available for download [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip ).\n\n## Creating Symlinks for Cityscapes:\n\nWe assume that your symlinked `detectron/datasets/data/cityscapes` directory has the following structure:\n\n```\ncityscapes\n|_ images\n|  |_ <im-1-name>.jpg\n|  |_ ...\n|  |_ <im-N-name>.jpg\n|_ annotations\n|  |_ instanceonly_gtFile_train.json\n|  |_ ...\n|_ raw\n   |_ gtFine\n   |_ ...\n   |_ README.md\n```\n\nCreate symlinks for `cityscapes`:\n\n```\nmkdir -p $DETECTRON/detectron/datasets/data/cityscapes\nln -s /path/to/cityscapes/images $DETECTRON/detectron/datasets/data/cityscapes/images\nln -s /path/to/cityscapes/json/annotations $DETECTRON/detectron/datasets/data/cityscapes/annotations\nln -s /path/to/cityscapes/root $DETECTRON/detectron/datasets/data/cityscapes/raw\n```\n\n### Cityscapes Annotations in COCO Format\n\nWe expect Cityscapes annotations converted to COCO json format, which we will make available for download soon.\n"
  },
  {
    "path": "detectron/datasets/dataset_catalog.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Collection of available datasets.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport os\n\n\n# Path to data dir\n_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')\n\n# Required dataset entry keys\n_IM_DIR = 'image_directory'\n_ANN_FN = 'annotation_file'\n\n# Optional dataset entry keys\n_IM_PREFIX = 'image_prefix'\n_DEVKIT_DIR = 'devkit_directory'\n_RAW_DIR = 'raw_dir'\n\n# Available datasets\n_DATASETS = {\n    'cityscapes_fine_instanceonly_seg_train': {\n        _IM_DIR:\n            _DATA_DIR + '/cityscapes/images',\n        _ANN_FN:\n            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_train.json',\n        _RAW_DIR:\n            _DATA_DIR + '/cityscapes/raw'\n    },\n    'cityscapes_fine_instanceonly_seg_val': {\n        _IM_DIR:\n            _DATA_DIR + '/cityscapes/images',\n        # use filtered validation as there is an issue converting contours\n        _ANN_FN:\n            _DATA_DIR + '/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',\n        _RAW_DIR:\n            _DATA_DIR + '/cityscapes/raw'\n    },\n    'cityscapes_fine_instanceonly_seg_test': {\n        _IM_DIR:\n            _DATA_DIR + '/cityscapes/images',\n        _ANN_FN:\n            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_test.json',\n        _RAW_DIR:\n            _DATA_DIR + '/cityscapes/raw'\n    },\n    'coco_2014_train': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_train2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/instances_train2014.json'\n    },\n    'coco_2014_val': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/instances_val2014.json'\n    },\n    'coco_2014_minival': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/instances_minival2014.json'\n    },\n    'coco_2014_valminusminival': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/instances_valminusminival2014.json'\n    },\n    'coco_2015_test': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test2015.json'\n    },\n    'coco_2015_test-dev': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'\n    },\n    'coco_2017_test': {  # 2017 test uses 2015 test images\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test2017.json',\n        _IM_PREFIX:\n            'COCO_test2015_'\n    },\n    'coco_2017_test-dev': {  # 2017 test-dev uses 2015 test images\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test-dev2017.json',\n        _IM_PREFIX:\n            'COCO_test2015_'\n    },\n    'coco_stuff_train': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_train2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/coco_stuff_train.json'\n    },\n    'coco_stuff_val': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/coco_stuff_val.json'\n    },\n    'keypoints_coco_2014_train': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_train2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/person_keypoints_train2014.json'\n    },\n    'keypoints_coco_2014_val': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/person_keypoints_val2014.json'\n    },\n    'keypoints_coco_2014_minival': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/person_keypoints_minival2014.json'\n    },\n    'keypoints_coco_2014_valminusminival': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_val2014',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/person_keypoints_valminusminival2014.json'\n    },\n    'keypoints_coco_2015_test': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test2015.json'\n    },\n    'keypoints_coco_2015_test-dev': {\n        _IM_DIR:\n            _DATA_DIR + '/coco/coco_test2015',\n        _ANN_FN:\n            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'\n    },\n    'voc_2007_train': {\n        _IM_DIR:\n            _DATA_DIR + '/VOC2007/JPEGImages',\n        _ANN_FN:\n            _DATA_DIR + '/VOC2007/annotations/voc_2007_train.json',\n        _DEVKIT_DIR:\n            _DATA_DIR + '/VOC2007/VOCdevkit2007'\n    },\n    'voc_2007_val': {\n        _IM_DIR:\n            _DATA_DIR + '/VOC2007/JPEGImages',\n        _ANN_FN:\n            _DATA_DIR + '/VOC2007/annotations/voc_2007_val.json',\n        _DEVKIT_DIR:\n            _DATA_DIR + '/VOC2007/VOCdevkit2007'\n    },\n    'voc_2007_test': {\n        _IM_DIR:\n            _DATA_DIR + '/VOC2007/JPEGImages',\n        _ANN_FN:\n            _DATA_DIR + '/VOC2007/annotations/voc_2007_test.json',\n        _DEVKIT_DIR:\n            _DATA_DIR + '/VOC2007/VOCdevkit2007'\n    },\n    'voc_2012_train': {\n        _IM_DIR:\n            _DATA_DIR + '/VOC2012/JPEGImages',\n        _ANN_FN:\n            _DATA_DIR + '/VOC2012/annotations/voc_2012_train.json',\n        _DEVKIT_DIR:\n            _DATA_DIR + '/VOC2012/VOCdevkit2012'\n    },\n    'voc_2012_val': {\n        _IM_DIR:\n            _DATA_DIR + '/VOC2012/JPEGImages',\n        _ANN_FN:\n            _DATA_DIR + '/VOC2012/annotations/voc_2012_val.json',\n        _DEVKIT_DIR:\n            _DATA_DIR + '/VOC2012/VOCdevkit2012'\n    }\n}\n\n\ndef datasets():\n    \"\"\"Retrieve the list of available dataset names.\"\"\"\n    return _DATASETS.keys()\n\n\ndef contains(name):\n    \"\"\"Determine if the dataset is in the catalog.\"\"\"\n    return name in _DATASETS.keys()\n\n\ndef get_im_dir(name):\n    \"\"\"Retrieve the image directory for the dataset.\"\"\"\n    return _DATASETS[name][_IM_DIR]\n\n\ndef get_ann_fn(name):\n    \"\"\"Retrieve the annotation file for the dataset.\"\"\"\n    return _DATASETS[name][_ANN_FN]\n\n\ndef get_im_prefix(name):\n    \"\"\"Retrieve the image prefix for the dataset.\"\"\"\n    return _DATASETS[name][_IM_PREFIX] if _IM_PREFIX in _DATASETS[name] else ''\n\n\ndef get_devkit_dir(name):\n    \"\"\"Retrieve the devkit dir for the dataset.\"\"\"\n    return _DATASETS[name][_DEVKIT_DIR]\n\n\ndef get_raw_dir(name):\n    \"\"\"Retrieve the raw dir for the dataset.\"\"\"\n    return _DATASETS[name][_RAW_DIR]\n"
  },
  {
    "path": "detectron/datasets/dummy_datasets.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\"\"\"Provide stub objects that can act as stand-in \"dummy\" datasets for simple use\ncases, like getting all classes in a dataset. This exists so that demos can be\nrun without requiring users to download/install datasets first.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.utils.collections import AttrDict\n\n\ndef get_coco_dataset():\n    \"\"\"A dummy COCO dataset that includes only the 'classes' field.\"\"\"\n    ds = AttrDict()\n    classes = [\n        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',\n        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',\n        'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',\n        'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',\n        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',\n        'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',\n        'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',\n        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',\n        'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',\n        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',\n        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n        'scissors', 'teddy bear', 'hair drier', 'toothbrush'\n    ]\n    ds.classes = {i: name for i, name in enumerate(classes)}\n    return ds\n"
  },
  {
    "path": "detectron/datasets/json_dataset.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Representation of the standard COCO json dataset format.\n\nWhen working with a new dataset, we strongly suggest to convert the dataset into\nthe COCO json format and use the existing code; it is not recommended to write\ncode to support new dataset formats.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\nimport logging\nimport numpy as np\nimport os\nimport scipy.sparse\n\n# Must happen before importing COCO API (which imports matplotlib)\nimport detectron.utils.env as envu\nenvu.set_up_matplotlib()\n# COCO API\nfrom pycocotools import mask as COCOmask\nfrom pycocotools.coco import COCO\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.timer import Timer\nimport detectron.datasets.dataset_catalog as dataset_catalog\nimport detectron.utils.boxes as box_utils\nfrom detectron.utils.io import load_object\nimport detectron.utils.segms as segm_utils\n\nlogger = logging.getLogger(__name__)\n\n\nclass JsonDataset:\n    \"\"\"A class representing a COCO json dataset.\"\"\"\n\n    def __init__(self, name):\n        assert dataset_catalog.contains(name), \\\n            'Unknown dataset name: {}'.format(name)\n        assert os.path.exists(dataset_catalog.get_im_dir(name)), \\\n            'Im dir \\'{}\\' not found'.format(dataset_catalog.get_im_dir(name))\n        assert os.path.exists(dataset_catalog.get_ann_fn(name)), \\\n            'Ann fn \\'{}\\' not found'.format(dataset_catalog.get_ann_fn(name))\n        logger.debug('Creating: {}'.format(name))\n        self.name = name\n        self.image_directory = dataset_catalog.get_im_dir(name)\n        self.image_prefix = dataset_catalog.get_im_prefix(name)\n        self.COCO = COCO(dataset_catalog.get_ann_fn(name))\n        self.debug_timer = Timer()\n        # Set up dataset classes\n        category_ids = self.COCO.getCatIds()\n        categories = [c['name'] for c in self.COCO.loadCats(category_ids)]\n        self.category_to_id_map = dict(zip(categories, category_ids))\n        self.classes = ['__background__'] + categories\n        self.num_classes = len(self.classes)\n        self.json_category_id_to_contiguous_id = {\n            v: i + 1\n            for i, v in enumerate(self.COCO.getCatIds())\n        }\n        self.contiguous_category_id_to_json_id = {\n            v: k\n            for k, v in self.json_category_id_to_contiguous_id.items()\n        }\n        self._init_keypoints()\n\n    def get_roidb(\n        self,\n        gt=False,\n        proposal_file=None,\n        min_proposal_size=2,\n        proposal_limit=-1,\n        crowd_filter_thresh=0\n    ):\n        \"\"\"Return an roidb corresponding to the json dataset. Optionally:\n           - include ground truth boxes in the roidb\n           - add proposals specified in a proposals file\n           - filter proposals based on a minimum side length\n           - filter proposals that intersect with crowd regions\n        \"\"\"\n        assert gt is True or crowd_filter_thresh == 0, \\\n            'Crowd filter threshold must be 0 if ground-truth annotations ' \\\n            'are not included.'\n        image_ids = self.COCO.getImgIds()\n        image_ids.sort()\n        roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))\n        for entry in roidb:\n            self._prep_roidb_entry(entry)\n        if gt:\n            # Include ground-truth object annotations\n            self.debug_timer.tic()\n            for entry in roidb:\n                self._add_gt_annotations(entry)\n            logger.debug(\n                '_add_gt_annotations took {:.3f}s'.\n                format(self.debug_timer.toc(average=False))\n            )\n        if proposal_file is not None:\n            # Include proposals from a file\n            self.debug_timer.tic()\n            self._add_proposals_from_file(\n                roidb, proposal_file, min_proposal_size, proposal_limit,\n                crowd_filter_thresh\n            )\n            logger.debug(\n                '_add_proposals_from_file took {:.3f}s'.\n                format(self.debug_timer.toc(average=False))\n            )\n        _add_class_assignments(roidb)\n        return roidb\n\n    def _prep_roidb_entry(self, entry):\n        \"\"\"Adds empty metadata fields to an roidb entry.\"\"\"\n        # Reference back to the parent dataset\n        entry['dataset'] = self\n        # Make file_name an abs path\n        im_path = os.path.join(\n            self.image_directory, self.image_prefix + entry['file_name']\n        )\n        assert os.path.exists(im_path), 'Image \\'{}\\' not found'.format(im_path)\n        entry['image'] = im_path\n        entry['flipped'] = False\n        entry['has_visible_keypoints'] = False\n        # Empty placeholders\n        entry['boxes'] = np.empty((0, 4), dtype=np.float32)\n        entry['segms'] = []\n        entry['gt_classes'] = np.empty((0), dtype=np.int32)\n        entry['seg_areas'] = np.empty((0), dtype=np.float32)\n        entry['gt_overlaps'] = scipy.sparse.csr_matrix(\n            np.empty((0, self.num_classes), dtype=np.float32)\n        )\n        entry['is_crowd'] = np.empty((0), dtype=bool)\n        # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index\n        # in the list of rois that satisfy np.where(entry['gt_classes'] > 0)\n        entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)\n        if self.keypoints is not None:\n            entry['gt_keypoints'] = np.empty(\n                (0, 3, self.num_keypoints), dtype=np.int32\n            )\n        # Remove unwanted fields that come from the json file (if they exist)\n        for k in ['date_captured', 'url', 'license', 'file_name']:\n            if k in entry:\n                del entry[k]\n\n    def _add_gt_annotations(self, entry):\n        \"\"\"Add ground truth annotation metadata to an roidb entry.\"\"\"\n        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)\n        objs = self.COCO.loadAnns(ann_ids)\n        # Sanitize bboxes -- some are invalid\n        valid_objs = []\n        valid_segms = []\n        width = entry['width']\n        height = entry['height']\n        for obj in objs:\n            # crowd regions are RLE encoded\n            if segm_utils.is_poly(obj['segmentation']):\n                # Valid polygons have >= 3 points, so require >= 6 coordinates\n                obj['segmentation'] = [\n                    p for p in obj['segmentation'] if len(p) >= 6\n                ]\n            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:\n                continue\n            if 'ignore' in obj and obj['ignore'] == 1:\n                continue\n            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)\n            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])\n            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(\n                x1, y1, x2, y2, height, width\n            )\n            # Require non-zero seg area and more than 1x1 box size\n            if obj['area'] > 0 and x2 > x1 and y2 > y1:\n                obj['clean_bbox'] = [x1, y1, x2, y2]\n                valid_objs.append(obj)\n                valid_segms.append(obj['segmentation'])\n        num_valid_objs = len(valid_objs)\n\n        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)\n        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)\n        gt_overlaps = np.zeros(\n            (num_valid_objs, self.num_classes),\n            dtype=entry['gt_overlaps'].dtype\n        )\n        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)\n        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)\n        box_to_gt_ind_map = np.zeros(\n            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype\n        )\n        if self.keypoints is not None:\n            gt_keypoints = np.zeros(\n                (num_valid_objs, 3, self.num_keypoints),\n                dtype=entry['gt_keypoints'].dtype\n            )\n\n        im_has_visible_keypoints = False\n        for ix, obj in enumerate(valid_objs):\n            cls = self.json_category_id_to_contiguous_id[obj['category_id']]\n            boxes[ix, :] = obj['clean_bbox']\n            gt_classes[ix] = cls\n            seg_areas[ix] = obj['area']\n            is_crowd[ix] = obj['iscrowd']\n            box_to_gt_ind_map[ix] = ix\n            if self.keypoints is not None:\n                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)\n                if np.sum(gt_keypoints[ix, 2, :]) > 0:\n                    im_has_visible_keypoints = True\n            if obj['iscrowd']:\n                # Set overlap to -1 for all classes for crowd objects\n                # so they will be excluded during training\n                gt_overlaps[ix, :] = -1.0\n            else:\n                gt_overlaps[ix, cls] = 1.0\n        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)\n        entry['segms'].extend(valid_segms)\n        # To match the original implementation:\n        # entry['boxes'] = np.append(\n        #     entry['boxes'], boxes.astype(int).astype(float), axis=0)\n        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)\n        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)\n        entry['gt_overlaps'] = np.append(\n            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0\n        )\n        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])\n        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)\n        entry['box_to_gt_ind_map'] = np.append(\n            entry['box_to_gt_ind_map'], box_to_gt_ind_map\n        )\n        if self.keypoints is not None:\n            entry['gt_keypoints'] = np.append(\n                entry['gt_keypoints'], gt_keypoints, axis=0\n            )\n            entry['has_visible_keypoints'] = im_has_visible_keypoints\n\n    def _add_proposals_from_file(\n        self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh\n    ):\n        \"\"\"Add proposals from a proposals file to an roidb.\"\"\"\n        logger.info('Loading proposals from: {}'.format(proposal_file))\n        proposals = load_object(proposal_file)\n\n        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix\n\n        _remove_proposals_not_in_roidb(proposals, roidb, id_field)\n        _sort_proposals(proposals, id_field)\n        box_list = []\n        for i, entry in enumerate(roidb):\n            if i % 2500 == 0:\n                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))\n            boxes = proposals['boxes'][i]\n            # Sanity check that these boxes are for the correct image id\n            assert entry['id'] == proposals[id_field][i]\n            # Remove duplicate boxes and very small boxes and then take top k\n            boxes = box_utils.clip_boxes_to_image(\n                boxes, entry['height'], entry['width']\n            )\n            keep = box_utils.unique_boxes(boxes)\n            boxes = boxes[keep, :]\n            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)\n            boxes = boxes[keep, :]\n            if top_k > 0:\n                boxes = boxes[:top_k, :]\n            box_list.append(boxes)\n        _merge_proposal_boxes_into_roidb(roidb, box_list)\n        if crowd_thresh > 0:\n            _filter_crowd_proposals(roidb, crowd_thresh)\n\n    def _init_keypoints(self):\n        \"\"\"Initialize COCO keypoint information.\"\"\"\n        self.keypoints = None\n        self.keypoint_flip_map = None\n        self.keypoints_to_id_map = None\n        self.num_keypoints = 0\n        # Thus far only the 'person' category has keypoints\n        if 'person' in self.category_to_id_map:\n            cat_info = self.COCO.loadCats([self.category_to_id_map['person']])\n        else:\n            return\n\n        # Check if the annotations contain keypoint data or not\n        if 'keypoints' in cat_info[0]:\n            keypoints = cat_info[0]['keypoints']\n            self.keypoints_to_id_map = dict(\n                zip(keypoints, range(len(keypoints))))\n            self.keypoints = keypoints\n            self.num_keypoints = len(keypoints)\n            self.keypoint_flip_map = {\n                'left_eye': 'right_eye',\n                'left_ear': 'right_ear',\n                'left_shoulder': 'right_shoulder',\n                'left_elbow': 'right_elbow',\n                'left_wrist': 'right_wrist',\n                'left_hip': 'right_hip',\n                'left_knee': 'right_knee',\n                'left_ankle': 'right_ankle'}\n\n    def _get_gt_keypoints(self, obj):\n        \"\"\"Return ground truth keypoints.\"\"\"\n        if 'keypoints' not in obj:\n            return None\n        kp = np.array(obj['keypoints'])\n        x = kp[0::3]  # 0-indexed x coordinates\n        y = kp[1::3]  # 0-indexed y coordinates\n        # 0: not labeled; 1: labeled, not inside mask;\n        # 2: labeled and inside mask\n        v = kp[2::3]\n        num_keypoints = len(obj['keypoints']) / 3\n        assert num_keypoints == self.num_keypoints\n        gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32)\n        for i in range(self.num_keypoints):\n            gt_kps[0, i] = x[i]\n            gt_kps[1, i] = y[i]\n            gt_kps[2, i] = v[i]\n        return gt_kps\n\n\ndef add_proposals(roidb, rois, scales, crowd_thresh):\n    \"\"\"Add proposal boxes (rois) to an roidb that has ground-truth annotations\n    but no proposals. If the proposals are not at the original image scale,\n    specify the scale factor that separate them in scales.\n    \"\"\"\n    box_list = []\n    for i in range(len(roidb)):\n        inv_im_scale = 1. / scales[i]\n        idx = np.where(rois[:, 0] == i)[0]\n        box_list.append(rois[idx, 1:] * inv_im_scale)\n    _merge_proposal_boxes_into_roidb(roidb, box_list)\n    if crowd_thresh > 0:\n        _filter_crowd_proposals(roidb, crowd_thresh)\n    _add_class_assignments(roidb)\n\n\ndef _merge_proposal_boxes_into_roidb(roidb, box_list):\n    \"\"\"Add proposal boxes to each roidb entry.\"\"\"\n    assert len(box_list) == len(roidb)\n    for i, entry in enumerate(roidb):\n        boxes = box_list[i]\n        num_boxes = boxes.shape[0]\n        gt_overlaps = np.zeros(\n            (num_boxes, entry['gt_overlaps'].shape[1]),\n            dtype=entry['gt_overlaps'].dtype\n        )\n        box_to_gt_ind_map = -np.ones(\n            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype\n        )\n\n        # Note: unlike in other places, here we intentionally include all gt\n        # rois, even ones marked as crowd. Boxes that overlap with crowds will\n        # be filtered out later (see: _filter_crowd_proposals).\n        gt_inds = np.where(entry['gt_classes'] > 0)[0]\n        if len(gt_inds) > 0:\n            gt_boxes = entry['boxes'][gt_inds, :]\n            gt_classes = entry['gt_classes'][gt_inds]\n            proposal_to_gt_overlaps = box_utils.bbox_overlaps(\n                boxes.astype(dtype=np.float32, copy=False),\n                gt_boxes.astype(dtype=np.float32, copy=False)\n            )\n            # Gt box that overlaps each input box the most\n            # (ties are broken arbitrarily by class order)\n            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)\n            # Amount of that overlap\n            maxes = proposal_to_gt_overlaps.max(axis=1)\n            # Those boxes with non-zero overlap with gt boxes\n            I = np.where(maxes > 0)[0]\n            # Record max overlaps with the class of the appropriate gt box\n            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]\n            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]\n        entry['boxes'] = np.append(\n            entry['boxes'],\n            boxes.astype(entry['boxes'].dtype, copy=False),\n            axis=0\n        )\n        entry['gt_classes'] = np.append(\n            entry['gt_classes'],\n            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)\n        )\n        entry['seg_areas'] = np.append(\n            entry['seg_areas'],\n            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)\n        )\n        entry['gt_overlaps'] = np.append(\n            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0\n        )\n        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])\n        entry['is_crowd'] = np.append(\n            entry['is_crowd'],\n            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)\n        )\n        entry['box_to_gt_ind_map'] = np.append(\n            entry['box_to_gt_ind_map'],\n            box_to_gt_ind_map.astype(\n                entry['box_to_gt_ind_map'].dtype, copy=False\n            )\n        )\n\n\ndef _filter_crowd_proposals(roidb, crowd_thresh):\n    \"\"\"Finds proposals that are inside crowd regions and marks them as\n    overlap = -1 with each ground-truth rois, which means they will be excluded\n    from training.\n    \"\"\"\n    for entry in roidb:\n        gt_overlaps = entry['gt_overlaps'].toarray()\n        crowd_inds = np.where(entry['is_crowd'] == 1)[0]\n        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]\n        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:\n            continue\n        crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])\n        non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])\n        iscrowd_flags = [int(True)] * len(crowd_inds)\n        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags)\n        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]\n        gt_overlaps[non_gt_inds[bad_inds], :] = -1\n        entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)\n\n\ndef _add_class_assignments(roidb):\n    \"\"\"Compute object category assignment for each box associated with each\n    roidb entry.\n    \"\"\"\n    for entry in roidb:\n        gt_overlaps = entry['gt_overlaps'].toarray()\n        # max overlap with gt over classes (columns)\n        max_overlaps = gt_overlaps.max(axis=1)\n        # gt class that had the max overlap\n        max_classes = gt_overlaps.argmax(axis=1)\n        entry['max_classes'] = max_classes\n        entry['max_overlaps'] = max_overlaps\n        # sanity checks\n        # if max overlap is 0, the class must be background (class 0)\n        zero_inds = np.where(max_overlaps == 0)[0]\n        assert all(max_classes[zero_inds] == 0)\n        # if max overlap > 0, the class must be a fg class (not class 0)\n        nonzero_inds = np.where(max_overlaps > 0)[0]\n        assert all(max_classes[nonzero_inds] != 0)\n\n\ndef _sort_proposals(proposals, id_field):\n    \"\"\"Sort proposals by the specified id field.\"\"\"\n    order = np.argsort(proposals[id_field])\n    fields_to_sort = ['boxes', id_field, 'scores']\n    for k in fields_to_sort:\n        proposals[k] = [proposals[k][i] for i in order]\n\n\ndef _remove_proposals_not_in_roidb(proposals, roidb, id_field):\n    # fix proposals so they don't contain entries for images not in the roidb\n    roidb_ids = set({entry[\"id\"] for entry in roidb})\n    keep = [i for i, id in enumerate(proposals[id_field]) if id in roidb_ids]\n    for f in ['boxes', id_field, 'scores']:\n        proposals[f] = [proposals[f][i] for i in keep]\n"
  },
  {
    "path": "detectron/datasets/json_dataset_evaluator.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Functions for evaluating results computed for a json dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport json\nimport logging\nimport numpy as np\nimport os\nimport six\nimport uuid\n\nfrom pycocotools.cocoeval import COCOeval\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.io import save_object\nimport detectron.utils.boxes as box_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef evaluate_masks(\n    json_dataset,\n    all_boxes,\n    all_segms,\n    output_dir,\n    use_salt=True,\n    cleanup=False\n):\n    res_file = os.path.join(\n        output_dir, 'segmentations_' + json_dataset.name + '_results'\n    )\n    if use_salt:\n        res_file += '_{}'.format(str(uuid.uuid4()))\n    res_file += '.json'\n    _write_coco_segms_results_file(\n        json_dataset, all_boxes, all_segms, res_file)\n    # Only do evaluation on non-test sets (annotations are undisclosed on test)\n    if json_dataset.name.find('test') == -1:\n        coco_eval = _do_segmentation_eval(json_dataset, res_file, output_dir)\n    else:\n        logger.warning(\n            '{} eval ignored as annotations are undisclosed on test: {} ignored'\n            .format(\"Segmentation\", json_dataset.name)\n        )\n        coco_eval = None\n    # Optionally cleanup results json file\n    if cleanup:\n        os.remove(res_file)\n    return coco_eval\n\n\ndef _write_coco_segms_results_file(\n    json_dataset, all_boxes, all_segms, res_file\n):\n    # [{\"image_id\": 42,\n    #   \"category_id\": 18,\n    #   \"segmentation\": [...],\n    #   \"score\": 0.236}, ...]\n    results = []\n    for cls_ind, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        if cls_ind >= len(all_boxes):\n            break\n        cat_id = json_dataset.category_to_id_map[cls]\n        results.extend(_coco_segms_results_one_category(\n            json_dataset, all_boxes[cls_ind], all_segms[cls_ind], cat_id))\n    logger.info(\n        'Writing segmentation results json to: {}'.format(\n            os.path.abspath(res_file)))\n    with open(res_file, 'w') as fid:\n        # \"counts\" is an array encoded by mask_util as a byte-stream. Python3's\n        # json writer which /always produces strings/ cannot serialize a bytestream\n        # unless you decode it. Thankfully, utf-8 works out (which is also what\n        # the pycocotools/_mask.pyx does.\n        if six.PY3:\n            for r in results:\n                rle = r['segmentation']\n                if 'counts' in rle:\n                    rle['counts'] = rle['counts'].decode(\"utf8\")\n\n        json.dump(results, fid)\n\n\ndef _coco_segms_results_one_category(json_dataset, boxes, segms, cat_id):\n    results = []\n    image_ids = json_dataset.COCO.getImgIds()\n    image_ids.sort()\n    assert len(boxes) == len(image_ids)\n    assert len(segms) == len(image_ids)\n    for i, image_id in enumerate(image_ids):\n        dets = boxes[i]\n        rles = segms[i]\n\n        if isinstance(dets, list) and len(dets) == 0:\n            continue\n\n        dets = dets.astype(float)\n        scores = dets[:, -1]\n\n        results.extend(\n            [{'image_id': image_id,\n              'category_id': cat_id,\n              'segmentation': rles[k],\n              'score': scores[k]}\n              for k in range(dets.shape[0])])\n\n    return results\n\n\ndef _do_segmentation_eval(json_dataset, res_file, output_dir):\n    coco_dt = json_dataset.COCO.loadRes(str(res_file))\n    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'segm')\n    coco_eval.evaluate()\n    coco_eval.accumulate()\n    _log_detection_eval_metrics(json_dataset, coco_eval)\n    eval_file = os.path.join(output_dir, 'segmentation_results.pkl')\n    save_object(coco_eval, eval_file)\n    logger.info('Wrote json eval results to: {}'.format(eval_file))\n    return coco_eval\n\n\ndef evaluate_boxes(\n    json_dataset, all_boxes, output_dir, use_salt=True, cleanup=False\n):\n    res_file = os.path.join(\n        output_dir, 'bbox_' + json_dataset.name + '_results'\n    )\n    if use_salt:\n        res_file += '_{}'.format(str(uuid.uuid4()))\n    res_file += '.json'\n    _write_coco_bbox_results_file(json_dataset, all_boxes, res_file)\n    # Only do evaluation on non-test sets (annotations are undisclosed on test)\n    if json_dataset.name.find('test') == -1:\n        coco_eval = _do_detection_eval(json_dataset, res_file, output_dir)\n    else:\n        logger.warning(\n            '{} eval ignored as annotations are undisclosed on test: {} ignored'\n            .format(\"Bbox\", json_dataset.name)\n        )\n        coco_eval = None\n    # Optionally cleanup results json file\n    if cleanup:\n        os.remove(res_file)\n    return coco_eval\n\n\ndef _write_coco_bbox_results_file(json_dataset, all_boxes, res_file):\n    # [{\"image_id\": 42,\n    #   \"category_id\": 18,\n    #   \"bbox\": [258.15,41.29,348.26,243.78],\n    #   \"score\": 0.236}, ...]\n    results = []\n    for cls_ind, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        if cls_ind >= len(all_boxes):\n            break\n        cat_id = json_dataset.category_to_id_map[cls]\n        results.extend(_coco_bbox_results_one_category(\n            json_dataset, all_boxes[cls_ind], cat_id))\n    logger.info(\n        'Writing bbox results json to: {}'.format(os.path.abspath(res_file)))\n    with open(res_file, 'w') as fid:\n        json.dump(results, fid)\n\n\ndef _coco_bbox_results_one_category(json_dataset, boxes, cat_id):\n    results = []\n    image_ids = json_dataset.COCO.getImgIds()\n    image_ids.sort()\n    assert len(boxes) == len(image_ids)\n    for i, image_id in enumerate(image_ids):\n        dets = boxes[i]\n        if isinstance(dets, list) and len(dets) == 0:\n            continue\n        dets = dets.astype(float)\n        scores = dets[:, -1]\n        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])\n        xs = xywh_dets[:, 0]\n        ys = xywh_dets[:, 1]\n        ws = xywh_dets[:, 2]\n        hs = xywh_dets[:, 3]\n        results.extend(\n            [{'image_id': image_id,\n              'category_id': cat_id,\n              'bbox': [xs[k], ys[k], ws[k], hs[k]],\n              'score': scores[k]} for k in range(dets.shape[0])])\n    return results\n\n\ndef _do_detection_eval(json_dataset, res_file, output_dir):\n    coco_dt = json_dataset.COCO.loadRes(str(res_file))\n    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'bbox')\n    coco_eval.evaluate()\n    coco_eval.accumulate()\n    _log_detection_eval_metrics(json_dataset, coco_eval)\n    eval_file = os.path.join(output_dir, 'detection_results.pkl')\n    save_object(coco_eval, eval_file)\n    logger.info('Wrote json eval results to: {}'.format(eval_file))\n    return coco_eval\n\n\ndef _log_detection_eval_metrics(json_dataset, coco_eval):\n    def _get_thr_ind(coco_eval, thr):\n        ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &\n                       (coco_eval.params.iouThrs < thr + 1e-5))[0][0]\n        iou_thr = coco_eval.params.iouThrs[ind]\n        assert np.isclose(iou_thr, thr)\n        return ind\n\n    IoU_lo_thresh = 0.5\n    IoU_hi_thresh = 0.95\n    ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)\n    ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)\n    # precision has dims (iou, recall, cls, area range, max dets)\n    # area range index 0: all area ranges\n    # max dets index 2: 100 per image\n    precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]\n    ap_default = np.mean(precision[precision > -1])\n    logger.info(\n        '~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ~~~~'.format(\n            IoU_lo_thresh, IoU_hi_thresh))\n    logger.info('{:.1f}'.format(100 * ap_default))\n    for cls_ind, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        # minus 1 because of __background__\n        precision = coco_eval.eval['precision'][\n            ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]\n        ap = np.mean(precision[precision > -1])\n        logger.info('{:.1f}'.format(100 * ap))\n    logger.info('~~~~ Summary metrics ~~~~')\n    coco_eval.summarize()\n\n\ndef evaluate_box_proposals(\n    json_dataset, roidb, thresholds=None, area='all', limit=None, class_specific=False\n):\n    \"\"\"Evaluate detection proposal recall metrics. This function is a much\n    faster alternative to the official COCO API recall evaluation code. However,\n    it produces slightly different results.\n    \"\"\"\n    # Record max overlap value for each gt box\n    # Return vector of overlap values\n    areas = {\n        'all': 0,\n        'small': 1,\n        'medium': 2,\n        'large': 3,\n        '96-128': 4,\n        '128-256': 5,\n        '256-512': 6,\n        '512-inf': 7}\n    area_ranges = [\n        [0**2, 1e5**2],    # all\n        [0**2, 32**2],     # small\n        [32**2, 96**2],    # medium\n        [96**2, 1e5**2],   # large\n        [96**2, 128**2],   # 96-128\n        [128**2, 256**2],  # 128-256\n        [256**2, 512**2],  # 256-512\n        [512**2, 1e5**2]]  # 512-inf\n    assert area in areas, 'Unknown area range: {}'.format(area)\n    area_range = area_ranges[areas[area]]\n    gt_overlaps = np.zeros(0)\n    gt_classes = np.zeros(0)\n    num_pos = 0\n    for entry in roidb:\n        gt_inds = np.where(\n            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]\n        gt_boxes = entry['boxes'][gt_inds, :]\n        gt_areas = entry['seg_areas'][gt_inds]\n        valid_gt_inds = np.where(\n            (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]\n        gt_boxes = gt_boxes[valid_gt_inds, :]\n        _gt_classes = entry[\"gt_classes\"][valid_gt_inds]\n        assert gt_boxes.shape[0] == _gt_classes.shape[0]\n        gt_classes = np.hstack((gt_classes, _gt_classes))\n        num_pos += len(valid_gt_inds)\n        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]\n        boxes = entry['boxes'][non_gt_inds, :]\n        if boxes.shape[0] == 0:\n            continue\n        if limit is not None and boxes.shape[0] > limit:\n            boxes = boxes[:limit, :]\n        overlaps = box_utils.bbox_overlaps(\n            boxes.astype(dtype=np.float32, copy=False),\n            gt_boxes.astype(dtype=np.float32, copy=False))\n        _gt_overlaps = np.zeros((gt_boxes.shape[0]))\n        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):\n            # find which proposal box maximally covers each gt box\n            argmax_overlaps = overlaps.argmax(axis=0)\n            # and get the iou amount of coverage for each gt box\n            max_overlaps = overlaps.max(axis=0)\n            # find which gt box is 'best' covered (i.e. 'best' = most iou)\n            gt_ind = max_overlaps.argmax()\n            gt_ovr = max_overlaps.max()\n            assert gt_ovr >= 0\n            # find the proposal box that covers the best covered gt box\n            box_ind = argmax_overlaps[gt_ind]\n            # record the iou coverage of this gt box\n            _gt_overlaps[j] = overlaps[box_ind, gt_ind]\n            assert _gt_overlaps[j] == gt_ovr\n            # mark the proposal box and the gt box as used\n            overlaps[box_ind, :] = -1\n            overlaps[:, gt_ind] = -1\n        # append recorded iou coverage level\n        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))\n\n    if thresholds is None:\n        step = 0.05\n        thresholds = np.arange(0.5, 0.95 + 1e-5, step)\n\n    if not class_specific:\n        gt_overlaps = np.sort(gt_overlaps)\n        recalls = np.zeros_like(thresholds)\n        # compute recall for each iou threshold\n        for i, t in enumerate(thresholds):\n            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)\n        ar = recalls.mean()\n        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,\n                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}\n    else:\n        gt_classes_unique = np.unique(gt_classes)\n        recalls = np.zeros((gt_classes_unique.shape[0], thresholds.shape[0]))\n        # compute recall for each category and each iou threshold\n        for i, category_id in enumerate(gt_classes_unique):\n            inds = (gt_classes == category_id)\n            num_pos_per_category = float(inds.sum())\n            for j, thresh in enumerate(thresholds):\n                recalls[i][j] = (\n                    gt_overlaps[inds] >= thresh\n                ).sum() / num_pos_per_category\n        ar = recalls.mean(axis=1).mean()\n        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,\n                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}\n\ndef evaluate_keypoints(\n    json_dataset,\n    all_boxes,\n    all_keypoints,\n    output_dir,\n    use_salt=True,\n    cleanup=False\n):\n    res_file = os.path.join(\n        output_dir, 'keypoints_' + json_dataset.name + '_results'\n    )\n    if use_salt:\n        res_file += '_{}'.format(str(uuid.uuid4()))\n    res_file += '.json'\n    _write_coco_keypoint_results_file(\n        json_dataset, all_boxes, all_keypoints, res_file)\n    # Only do evaluation on non-test sets (annotations are undisclosed on test)\n    if json_dataset.name.find('test') == -1:\n        coco_eval = _do_keypoint_eval(json_dataset, res_file, output_dir)\n    else:\n        logger.warning(\n            '{} eval ignored as annotations are undisclosed on test: {} ignored'\n            .format(\"Keypoints\", json_dataset.name)\n        )\n        coco_eval = None\n    # Optionally cleanup results json file\n    if cleanup:\n        os.remove(res_file)\n    return coco_eval\n\n\ndef _write_coco_keypoint_results_file(\n    json_dataset, all_boxes, all_keypoints, res_file\n):\n    results = []\n    for cls_ind, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        if cls_ind >= len(all_keypoints):\n            break\n        logger.info(\n            'Collecting {} results ({:d}/{:d})'.format(\n                cls, cls_ind, len(all_keypoints) - 1))\n        cat_id = json_dataset.category_to_id_map[cls]\n        results.extend(_coco_kp_results_one_category(\n            json_dataset, all_boxes[cls_ind], all_keypoints[cls_ind], cat_id))\n    logger.info(\n        'Writing keypoint results json to: {}'.format(\n            os.path.abspath(res_file)))\n    with open(res_file, 'w') as fid:\n        json.dump(results, fid)\n\n\ndef _coco_kp_results_one_category(json_dataset, boxes, kps, cat_id):\n    results = []\n    image_ids = json_dataset.COCO.getImgIds()\n    image_ids.sort()\n    assert len(kps) == len(image_ids)\n    assert len(boxes) == len(image_ids)\n    use_box_score = False\n    if cfg.KRCNN.KEYPOINT_CONFIDENCE == 'logit':\n        # This is ugly; see utils.keypoints.heatmap_to_keypoints for the magic\n        # indexes\n        score_index = 2\n    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'prob':\n        score_index = 3\n    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'bbox':\n        use_box_score = True\n    else:\n        raise ValueError(\n            'KRCNN.KEYPOINT_CONFIDENCE must be \"logit\", \"prob\", or \"bbox\"')\n    for i, image_id in enumerate(image_ids):\n        if len(boxes[i]) == 0:\n            continue\n        kps_dets = kps[i]\n        scores = boxes[i][:, -1].astype(float)\n        if len(kps_dets) == 0:\n            continue\n        for j in range(len(kps_dets)):\n            xy = []\n\n            kps_score = 0\n            for k in range(kps_dets[j].shape[1]):\n                xy.append(float(kps_dets[j][0, k]))\n                xy.append(float(kps_dets[j][1, k]))\n                xy.append(1)\n                if not use_box_score:\n                    kps_score += kps_dets[j][score_index, k]\n\n            if use_box_score:\n                kps_score = scores[j]\n            else:\n                kps_score /= kps_dets[j].shape[1]\n\n            results.extend([{'image_id': image_id,\n                             'category_id': cat_id,\n                             'keypoints': xy,\n                             'score': kps_score}])\n    return results\n\n\ndef _do_keypoint_eval(json_dataset, res_file, output_dir):\n    ann_type = 'keypoints'\n    imgIds = json_dataset.COCO.getImgIds()\n    imgIds.sort()\n    coco_dt = json_dataset.COCO.loadRes(res_file)\n    coco_eval = COCOeval(json_dataset.COCO, coco_dt, ann_type)\n    coco_eval.params.imgIds = imgIds\n    coco_eval.evaluate()\n    coco_eval.accumulate()\n    eval_file = os.path.join(output_dir, 'keypoint_results.pkl')\n    save_object(coco_eval, eval_file)\n    logger.info('Wrote json eval results to: {}'.format(eval_file))\n    coco_eval.summarize()\n    return coco_eval\n"
  },
  {
    "path": "detectron/datasets/roidb.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Functions for common roidb manipulations.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom past.builtins import basestring\nimport logging\nimport numpy as np\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets.json_dataset import JsonDataset\nimport detectron.utils.boxes as box_utils\nimport detectron.utils.keypoints as keypoint_utils\nimport detectron.utils.segms as segm_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef combined_roidb_for_training(dataset_names, proposal_files):\n    \"\"\"Load and concatenate roidbs for one or more datasets, along with optional\n    object proposals. The roidb entries are then prepared for use in training,\n    which involves caching certain types of metadata for each roidb entry.\n    \"\"\"\n    def get_roidb(dataset_name, proposal_file):\n        ds = JsonDataset(dataset_name)\n        roidb = ds.get_roidb(\n            gt=True,\n            proposal_file=proposal_file,\n            crowd_filter_thresh=cfg.TRAIN.CROWD_FILTER_THRESH\n        )\n        if cfg.TRAIN.USE_FLIPPED:\n            logger.info('Appending horizontally-flipped training examples...')\n            extend_with_flipped_entries(roidb, ds)\n        logger.info('Loaded dataset: {:s}'.format(ds.name))\n        return roidb\n\n    if isinstance(dataset_names, basestring):\n        dataset_names = (dataset_names, )\n    if isinstance(proposal_files, basestring):\n        proposal_files = (proposal_files, )\n    if len(proposal_files) == 0:\n        proposal_files = (None, ) * len(dataset_names)\n    assert len(dataset_names) == len(proposal_files)\n    roidbs = [get_roidb(*args) for args in zip(dataset_names, proposal_files)]\n    roidb = roidbs[0]\n    for r in roidbs[1:]:\n        roidb.extend(r)\n    roidb = filter_for_training(roidb)\n\n    logger.info('Computing bounding-box regression targets...')\n    add_bbox_regression_targets(roidb)\n    logger.info('done')\n\n    _compute_and_log_stats(roidb)\n\n    return roidb\n\n\ndef extend_with_flipped_entries(roidb, dataset):\n    \"\"\"Flip each entry in the given roidb and return a new roidb that is the\n    concatenation of the original roidb and the flipped entries.\n\n    \"Flipping\" an entry means that that image and associated metadata (e.g.,\n    ground truth boxes and object proposals) are horizontally flipped.\n    \"\"\"\n    flipped_roidb = []\n    for entry in roidb:\n        width = entry['width']\n        boxes = entry['boxes'].copy()\n        oldx1 = boxes[:, 0].copy()\n        oldx2 = boxes[:, 2].copy()\n        boxes[:, 0] = width - oldx2 - 1\n        boxes[:, 2] = width - oldx1 - 1\n        assert (boxes[:, 2] >= boxes[:, 0]).all()\n        flipped_entry = {}\n        dont_copy = ('boxes', 'segms', 'gt_keypoints', 'flipped')\n        for k, v in entry.items():\n            if k not in dont_copy:\n                flipped_entry[k] = v\n        flipped_entry['boxes'] = boxes\n        flipped_entry['segms'] = segm_utils.flip_segms(\n            entry['segms'], entry['height'], entry['width']\n        )\n        if dataset.keypoints is not None:\n            flipped_entry['gt_keypoints'] = keypoint_utils.flip_keypoints(\n                dataset.keypoints, dataset.keypoint_flip_map,\n                entry['gt_keypoints'], entry['width']\n            )\n        flipped_entry['flipped'] = True\n        flipped_roidb.append(flipped_entry)\n    roidb.extend(flipped_roidb)\n\n\ndef filter_for_training(roidb):\n    \"\"\"Remove roidb entries that have no usable RoIs based on config settings.\n    \"\"\"\n    def is_valid(entry):\n        # Valid images have:\n        #   (1) At least one foreground RoI OR\n        #   (2) At least one background RoI\n        overlaps = entry['max_overlaps']\n        # find boxes with sufficient overlap\n        fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]\n        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)\n        bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &\n                           (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]\n        # image is only valid if such boxes exist\n        valid = len(fg_inds) > 0 or len(bg_inds) > 0\n        if cfg.MODEL.KEYPOINTS_ON:\n            # If we're training for keypoints, exclude images with no keypoints\n            valid = valid and entry['has_visible_keypoints']\n        return valid\n\n    num = len(roidb)\n    filtered_roidb = [entry for entry in roidb if is_valid(entry)]\n    num_after = len(filtered_roidb)\n    logger.info('Filtered {} roidb entries: {} -> {}'.\n                format(num - num_after, num, num_after))\n    return filtered_roidb\n\n\ndef add_bbox_regression_targets(roidb):\n    \"\"\"Add information needed to train bounding-box regressors.\"\"\"\n    for entry in roidb:\n        entry['bbox_targets'] = compute_bbox_regression_targets(entry)\n\n\ndef compute_bbox_regression_targets(entry):\n    \"\"\"Compute bounding-box regression targets for an image.\"\"\"\n    # Indices of ground-truth ROIs\n    rois = entry['boxes']\n    overlaps = entry['max_overlaps']\n    labels = entry['max_classes']\n    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]\n    # Targets has format (class, tx, ty, tw, th)\n    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)\n    if len(gt_inds) == 0:\n        # Bail if the image has no ground-truth ROIs\n        return targets\n\n    # Indices of examples for which we try to make predictions\n    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]\n\n    # Get IoU overlap between each ex ROI and gt ROI\n    ex_gt_overlaps = box_utils.bbox_overlaps(\n        rois[ex_inds, :].astype(dtype=np.float32, copy=False),\n        rois[gt_inds, :].astype(dtype=np.float32, copy=False))\n\n    # Find which gt ROI each ex ROI has max overlap with:\n    # this will be the ex ROI's gt target\n    gt_assignment = ex_gt_overlaps.argmax(axis=1)\n    gt_rois = rois[gt_inds[gt_assignment], :]\n    ex_rois = rois[ex_inds, :]\n    # Use class \"1\" for all boxes if using class_agnostic_bbox_reg\n    targets[ex_inds, 0] = (\n        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])\n    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(\n        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)\n    return targets\n\n\ndef _compute_and_log_stats(roidb):\n    classes = roidb[0]['dataset'].classes\n    char_len = np.max([len(c) for c in classes])\n    hist_bins = np.arange(len(classes) + 1)\n\n    # Histogram of ground-truth objects\n    gt_hist = np.zeros((len(classes)), dtype=int)\n    for entry in roidb:\n        gt_inds = np.where(\n            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]\n        gt_classes = entry['gt_classes'][gt_inds]\n        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]\n    logger.debug('Ground-truth class histogram:')\n    for i, v in enumerate(gt_hist):\n        logger.debug(\n            '{:d}{:s}: {:d}'.format(\n                i, classes[i].rjust(char_len), v))\n    logger.debug('-' * char_len)\n    logger.debug(\n        '{:s}: {:d}'.format(\n            'total'.rjust(char_len), np.sum(gt_hist)))\n"
  },
  {
    "path": "detectron/datasets/task_evaluation.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Evaluation interface for supported tasks (box detection, instance\nsegmentation, keypoint detection, ...).\n\n\nResults are stored in an OrderedDict with the following nested structure:\n\n<dataset>:\n  <task>:\n    <metric>: <val>\n\n<dataset> is any valid dataset (e.g., 'coco_2014_minival')\n<task> is in ['box', 'mask', 'keypoint', 'box_proposal']\n<metric> can be ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR@1000',\n                 'ARs@1000', 'ARm@1000', 'ARl@1000', ...]\n<val> is a floating point number\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import OrderedDict\nimport logging\nimport os\nimport pprint\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.logging import send_email\nimport detectron.datasets.cityscapes_json_dataset_evaluator \\\n    as cs_json_dataset_evaluator\nimport detectron.datasets.json_dataset_evaluator as json_dataset_evaluator\nimport detectron.datasets.voc_dataset_evaluator as voc_dataset_evaluator\n\nlogger = logging.getLogger(__name__)\n\n\ndef evaluate_all(\n    dataset, all_boxes, all_segms, all_keyps, output_dir, use_matlab=False\n):\n    \"\"\"Evaluate \"all\" tasks, where \"all\" includes box detection, instance\n    segmentation, and keypoint detection.\n    \"\"\"\n    all_results = evaluate_boxes(\n        dataset, all_boxes, output_dir, use_matlab=use_matlab\n    )\n    logger.info('Evaluating bounding boxes is done!')\n    if cfg.MODEL.MASK_ON:\n        results = evaluate_masks(dataset, all_boxes, all_segms, output_dir)\n        all_results[dataset.name].update(results[dataset.name])\n        logger.info('Evaluating segmentations is done!')\n    if cfg.MODEL.KEYPOINTS_ON:\n        results = evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir)\n        all_results[dataset.name].update(results[dataset.name])\n        logger.info('Evaluating keypoints is done!')\n    return all_results\n\n\ndef evaluate_boxes(dataset, all_boxes, output_dir, use_matlab=False):\n    \"\"\"Evaluate bounding box detection.\"\"\"\n    logger.info('Evaluating detections')\n    not_comp = not cfg.TEST.COMPETITION_MODE\n    if _use_json_dataset_evaluator(dataset):\n        coco_eval = json_dataset_evaluator.evaluate_boxes(\n            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp\n        )\n        box_results = _coco_eval_to_box_results(coco_eval)\n    elif _use_cityscapes_evaluator(dataset):\n        logger.warn('Cityscapes bbox evaluated using COCO metrics/conversions')\n        coco_eval = json_dataset_evaluator.evaluate_boxes(\n            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp\n        )\n        box_results = _coco_eval_to_box_results(coco_eval)\n    elif _use_voc_evaluator(dataset):\n        # For VOC, always use salt and always cleanup because results are\n        # written to the shared VOCdevkit results directory\n        voc_eval = voc_dataset_evaluator.evaluate_boxes(\n            dataset, all_boxes, output_dir, use_matlab=use_matlab\n        )\n        box_results = _voc_eval_to_box_results(voc_eval)\n    else:\n        raise NotImplementedError(\n            'No evaluator for dataset: {}'.format(dataset.name)\n        )\n    return OrderedDict([(dataset.name, box_results)])\n\n\ndef evaluate_masks(dataset, all_boxes, all_segms, output_dir):\n    \"\"\"Evaluate instance segmentation.\"\"\"\n    logger.info('Evaluating segmentations')\n    not_comp = not cfg.TEST.COMPETITION_MODE\n    if _use_json_dataset_evaluator(dataset):\n        coco_eval = json_dataset_evaluator.evaluate_masks(\n            dataset,\n            all_boxes,\n            all_segms,\n            output_dir,\n            use_salt=not_comp,\n            cleanup=not_comp\n        )\n        mask_results = _coco_eval_to_mask_results(coco_eval)\n    elif _use_cityscapes_evaluator(dataset):\n        cs_eval = cs_json_dataset_evaluator.evaluate_masks(\n            dataset,\n            all_boxes,\n            all_segms,\n            output_dir,\n            use_salt=not_comp,\n            cleanup=not_comp\n        )\n        mask_results = _cs_eval_to_mask_results(cs_eval)\n    else:\n        raise NotImplementedError(\n            'No evaluator for dataset: {}'.format(dataset.name)\n        )\n    return OrderedDict([(dataset.name, mask_results)])\n\n\ndef evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir):\n    \"\"\"Evaluate human keypoint detection (i.e., 2D pose estimation).\"\"\"\n    logger.info('Evaluating detections')\n    not_comp = not cfg.TEST.COMPETITION_MODE\n    assert dataset.name.startswith('keypoints_coco_'), \\\n        'Only COCO keypoints are currently supported'\n    coco_eval = json_dataset_evaluator.evaluate_keypoints(\n        dataset,\n        all_boxes,\n        all_keyps,\n        output_dir,\n        use_salt=not_comp,\n        cleanup=not_comp\n    )\n    keypoint_results = _coco_eval_to_keypoint_results(coco_eval)\n    return OrderedDict([(dataset.name, keypoint_results)])\n\n\ndef evaluate_box_proposals(dataset, roidb):\n    \"\"\"Evaluate bounding box object proposals.\"\"\"\n    res = _empty_box_proposal_results()\n    areas = {'all': '', 'small': 's', 'medium': 'm', 'large': 'l'}\n    for limit in [100, 1000]:\n        for area, suffix in areas.items():\n            stats = json_dataset_evaluator.evaluate_box_proposals(\n                dataset,\n                roidb,\n                area=area,\n                limit=limit,\n                class_specific=cfg.TEST.CLASS_SPECIFIC_AR\n            )\n            key = 'AR{}@{:d}'.format(suffix, limit)\n            res['box_proposal'][key] = stats['ar']\n    return OrderedDict([(dataset.name, res)])\n\n\ndef log_box_proposal_results(results):\n    \"\"\"Log bounding box proposal results.\"\"\"\n    for dataset in results.keys():\n        keys = results[dataset]['box_proposal'].keys()\n        pad = max([len(k) for k in keys])\n        logger.info(dataset)\n        for k, v in results[dataset]['box_proposal'].items():\n            logger.info('{}: {:.3f}'.format(k.ljust(pad), v))\n\n\ndef log_copy_paste_friendly_results(results):\n    \"\"\"Log results in a format that makes it easy to copy-and-paste in a\n    spreadsheet. Lines are prefixed with 'copypaste: ' to make grepping easy.\n    \"\"\"\n    for dataset in results.keys():\n        logger.info('copypaste: Dataset: {}'.format(dataset))\n        for task, metrics in results[dataset].items():\n            logger.info('copypaste: Task: {}'.format(task))\n            metric_names = metrics.keys()\n            metric_vals = ['{:.4f}'.format(v) for v in metrics.values()]\n            logger.info('copypaste: ' + ','.join(metric_names))\n            logger.info('copypaste: ' + ','.join(metric_vals))\n\n\ndef check_expected_results(results, atol=0.005, rtol=0.1):\n    \"\"\"Check actual results against expected results stored in\n    cfg.EXPECTED_RESULTS. Optionally email if the match exceeds the specified\n    tolerance.\n\n    Expected results should take the form of a list of expectations, each\n    specified by four elements: [dataset, task, metric, expected value]. For\n    example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...].\n\n    The expected value may also be formatted as a list [mean, std] providing\n    an empirical mean and standard deviation from which a valid range is computed\n    using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example:\n    [['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...]\n    \"\"\"\n    # cfg contains a reference set of results that we want to check against\n    if len(cfg.EXPECTED_RESULTS) == 0:\n        return\n\n    for dataset, task, metric, expected_val in cfg.EXPECTED_RESULTS:\n        assert dataset in results, 'Dataset {} not in results'.format(dataset)\n        assert task in results[dataset], 'Task {} not in results'.format(task)\n        assert metric in results[dataset][task], \\\n            'Metric {} not in results'.format(metric)\n        actual_val = results[dataset][task][metric]\n        ok = False\n        if isinstance(expected_val, list):\n            assert len(expected_val) == 2, (\n                'Expected result must be in (mean, std) format'\n            )\n            mean, std = expected_val\n            lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std\n            hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std\n            ok = (lo < actual_val) and (actual_val < hi)\n            msg = (\n                '{} > {} > {} sanity check (actual vs. expected): '\n                '{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})'\n            ).format(dataset, task, metric, actual_val, mean, std, lo, hi)\n        else:\n            err = abs(actual_val - expected_val)\n            tol = atol + rtol * abs(expected_val)\n            ok = (err > tol)\n            msg = (\n                '{} > {} > {} sanity check (actual vs. expected): '\n                '{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}'\n            ).format(dataset, task, metric, actual_val, expected_val, err, tol)\n        if not ok:\n            msg = 'FAIL: ' + msg\n            logger.error(msg)\n            if cfg.EXPECTED_RESULTS_EMAIL != '':\n                subject = 'Detectron end-to-end test failure'\n                job_name = os.environ[\n                    'DETECTRON_JOB_NAME'\n                ] if 'DETECTRON_JOB_NAME' in os.environ else '<unknown>'\n                job_id = os.environ[\n                    'WORKFLOW_RUN_ID'\n                ] if 'WORKFLOW_RUN_ID' in os.environ else '<unknown>'\n                body = [\n                    'Name:',\n                    job_name,\n                    'Run ID:',\n                    job_id,\n                    'Failure:',\n                    msg,\n                    'Config:',\n                    pprint.pformat(cfg),\n                    'Env:',\n                    pprint.pformat(dict(os.environ)),\n                ]\n                send_email(\n                    subject, '\\n\\n'.join(body), cfg.EXPECTED_RESULTS_EMAIL\n                )\n        else:\n            msg = 'PASS: ' + msg\n            logger.info(msg)\n\n\ndef _use_json_dataset_evaluator(dataset):\n    \"\"\"Check if the dataset uses the general json dataset evaluator.\"\"\"\n    return dataset.name.find('coco_') > -1 or cfg.TEST.FORCE_JSON_DATASET_EVAL\n\n\ndef _use_cityscapes_evaluator(dataset):\n    \"\"\"Check if the dataset uses the Cityscapes dataset evaluator.\"\"\"\n    return dataset.name.find('cityscapes_') > -1\n\n\ndef _use_voc_evaluator(dataset):\n    \"\"\"Check if the dataset uses the PASCAL VOC dataset evaluator.\"\"\"\n    return dataset.name[:4] == 'voc_'\n\n\n# Indices in the stats array for COCO boxes and masks\nCOCO_AP = 0\nCOCO_AP50 = 1\nCOCO_AP75 = 2\nCOCO_APS = 3\nCOCO_APM = 4\nCOCO_APL = 5\n# Slight difference for keypoints\nCOCO_KPS_APM = 3\nCOCO_KPS_APL = 4\n\n\n# ---------------------------------------------------------------------------- #\n# Helper functions for producing properly formatted results.\n# ---------------------------------------------------------------------------- #\n\ndef _coco_eval_to_box_results(coco_eval):\n    res = _empty_box_results()\n    if coco_eval is not None:\n        s = coco_eval.stats\n        res['box']['AP'] = s[COCO_AP]\n        res['box']['AP50'] = s[COCO_AP50]\n        res['box']['AP75'] = s[COCO_AP75]\n        res['box']['APs'] = s[COCO_APS]\n        res['box']['APm'] = s[COCO_APM]\n        res['box']['APl'] = s[COCO_APL]\n    return res\n\n\ndef _coco_eval_to_mask_results(coco_eval):\n    res = _empty_mask_results()\n    if coco_eval is not None:\n        s = coco_eval.stats\n        res['mask']['AP'] = s[COCO_AP]\n        res['mask']['AP50'] = s[COCO_AP50]\n        res['mask']['AP75'] = s[COCO_AP75]\n        res['mask']['APs'] = s[COCO_APS]\n        res['mask']['APm'] = s[COCO_APM]\n        res['mask']['APl'] = s[COCO_APL]\n    return res\n\n\ndef _coco_eval_to_keypoint_results(coco_eval):\n    res = _empty_keypoint_results()\n    if coco_eval is not None:\n        s = coco_eval.stats\n        res['keypoint']['AP'] = s[COCO_AP]\n        res['keypoint']['AP50'] = s[COCO_AP50]\n        res['keypoint']['AP75'] = s[COCO_AP75]\n        res['keypoint']['APm'] = s[COCO_KPS_APM]\n        res['keypoint']['APl'] = s[COCO_KPS_APL]\n    return res\n\n\ndef _voc_eval_to_box_results(voc_eval):\n    # Not supported (return empty results)\n    return _empty_box_results()\n\n\ndef _cs_eval_to_mask_results(cs_eval):\n    # Not supported (return empty results)\n    return _empty_mask_results()\n\n\ndef _empty_box_results():\n    return OrderedDict({\n        'box':\n        OrderedDict(\n            [\n                ('AP', -1),\n                ('AP50', -1),\n                ('AP75', -1),\n                ('APs', -1),\n                ('APm', -1),\n                ('APl', -1),\n            ]\n        )\n    })\n\n\ndef _empty_mask_results():\n    return OrderedDict({\n        'mask':\n        OrderedDict(\n            [\n                ('AP', -1),\n                ('AP50', -1),\n                ('AP75', -1),\n                ('APs', -1),\n                ('APm', -1),\n                ('APl', -1),\n            ]\n        )\n    })\n\n\ndef _empty_keypoint_results():\n    return OrderedDict({\n        'keypoint':\n        OrderedDict(\n            [\n                ('AP', -1),\n                ('AP50', -1),\n                ('AP75', -1),\n                ('APm', -1),\n                ('APl', -1),\n            ]\n        )\n    })\n\n\ndef _empty_box_proposal_results():\n    return OrderedDict({\n        'box_proposal':\n        OrderedDict(\n            [\n                ('AR@100', -1),\n                ('ARs@100', -1),\n                ('ARm@100', -1),\n                ('ARl@100', -1),\n                ('AR@1000', -1),\n                ('ARs@1000', -1),\n                ('ARm@1000', -1),\n                ('ARl@1000', -1),\n            ]\n        )\n    })\n"
  },
  {
    "path": "detectron/datasets/voc_dataset_evaluator.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"PASCAL VOC dataset evaluation interface.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\nimport os\nimport shutil\nimport uuid\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets.dataset_catalog import get_devkit_dir\nfrom detectron.datasets.voc_eval import voc_eval\nfrom detectron.utils.io import save_object\n\nlogger = logging.getLogger(__name__)\n\n\ndef evaluate_boxes(\n    json_dataset,\n    all_boxes,\n    output_dir,\n    use_salt=True,\n    cleanup=True,\n    use_matlab=False\n):\n    salt = '_{}'.format(str(uuid.uuid4())) if use_salt else ''\n    filenames = _write_voc_results_files(json_dataset, all_boxes, salt)\n    _do_python_eval(json_dataset, salt, output_dir)\n    if use_matlab:\n        _do_matlab_eval(json_dataset, salt, output_dir)\n    if cleanup:\n        for filename in filenames:\n            shutil.copy(filename, output_dir)\n            os.remove(filename)\n    return None\n\n\ndef _write_voc_results_files(json_dataset, all_boxes, salt):\n    filenames = []\n    image_set_path = voc_info(json_dataset)['image_set_path']\n    assert os.path.exists(image_set_path), \\\n        'Image set path does not exist: {}'.format(image_set_path)\n    with open(image_set_path, 'r') as f:\n        image_index = [x.strip() for x in f.readlines()]\n    # Sanity check that order of images in json dataset matches order in the\n    # image set\n    roidb = json_dataset.get_roidb()\n    for i, entry in enumerate(roidb):\n        index = os.path.splitext(os.path.split(entry['image'])[1])[0]\n        assert index == image_index[i]\n    for cls_ind, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        logger.info('Writing VOC results for: {}'.format(cls))\n        filename = _get_voc_results_file_template(json_dataset,\n                                                  salt).format(cls)\n        filenames.append(filename)\n        assert len(all_boxes[cls_ind]) == len(image_index)\n        with open(filename, 'wt') as f:\n            for im_ind, index in enumerate(image_index):\n                dets = all_boxes[cls_ind][im_ind]\n                if type(dets) == list:\n                    assert len(dets) == 0, \\\n                        'dets should be numpy.ndarray or empty list'\n                    continue\n                # the VOCdevkit expects 1-based indices\n                for k in range(dets.shape[0]):\n                    f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\\n'.\n                            format(index, dets[k, -1],\n                                   dets[k, 0] + 1, dets[k, 1] + 1,\n                                   dets[k, 2] + 1, dets[k, 3] + 1))\n    return filenames\n\n\ndef _get_voc_results_file_template(json_dataset, salt):\n    info = voc_info(json_dataset)\n    year = info['year']\n    image_set = info['image_set']\n    devkit_path = info['devkit_path']\n    # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt\n    filename = 'comp4' + salt + '_det_' + image_set + '_{:s}.txt'\n    return os.path.join(devkit_path, 'results', 'VOC' + year, 'Main', filename)\n\n\ndef _do_python_eval(json_dataset, salt, output_dir='output'):\n    info = voc_info(json_dataset)\n    year = info['year']\n    anno_path = info['anno_path']\n    image_set_path = info['image_set_path']\n    devkit_path = info['devkit_path']\n    cachedir = os.path.join(devkit_path, 'annotations_cache')\n    aps = []\n    # The PASCAL VOC metric changed in 2010\n    use_07_metric = True if int(year) < 2010 else False\n    logger.info('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))\n    if not os.path.isdir(output_dir):\n        os.mkdir(output_dir)\n    for _, cls in enumerate(json_dataset.classes):\n        if cls == '__background__':\n            continue\n        filename = _get_voc_results_file_template(\n            json_dataset, salt).format(cls)\n        rec, prec, ap = voc_eval(\n            filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,\n            use_07_metric=use_07_metric)\n        aps += [ap]\n        logger.info('AP for {} = {:.4f}'.format(cls, ap))\n        res_file = os.path.join(output_dir, cls + '_pr.pkl')\n        save_object({'rec': rec, 'prec': prec, 'ap': ap}, res_file)\n    logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))\n    logger.info('~~~~~~~~')\n    logger.info('Results:')\n    for ap in aps:\n        logger.info('{:.3f}'.format(ap))\n    logger.info('{:.3f}'.format(np.mean(aps)))\n    logger.info('~~~~~~~~')\n    logger.info('')\n    logger.info('----------------------------------------------------------')\n    logger.info('Results computed with the **unofficial** Python eval code.')\n    logger.info('Results should be very close to the official MATLAB code.')\n    logger.info('Use `./tools/reval.py --matlab ...` for your paper.')\n    logger.info('-- Thanks, The Management')\n    logger.info('----------------------------------------------------------')\n\n\ndef _do_matlab_eval(json_dataset, salt, output_dir='output'):\n    import subprocess\n    logger.info('-----------------------------------------------------')\n    logger.info('Computing results with the official MATLAB eval code.')\n    logger.info('-----------------------------------------------------')\n    info = voc_info(json_dataset)\n    path = os.path.join(\n        cfg.ROOT_DIR, 'detectron', 'datasets', 'VOCdevkit-matlab-wrapper')\n    cmd = 'cd {} && '.format(path)\n    cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)\n    cmd += '-r \"dbstop if error; '\n    cmd += 'voc_eval(\\'{:s}\\',\\'{:s}\\',\\'{:s}\\',\\'{:s}\\'); quit;\"' \\\n       .format(info['devkit_path'], 'comp4' + salt, info['image_set'],\n               output_dir)\n    logger.info('Running:\\n{}'.format(cmd))\n    subprocess.call(cmd, shell=True)\n\n\ndef voc_info(json_dataset):\n    year = json_dataset.name[4:8]\n    image_set = json_dataset.name[9:]\n    devkit_path = get_devkit_dir(json_dataset.name)\n    assert os.path.exists(devkit_path), \\\n        'Devkit directory {} not found'.format(devkit_path)\n    anno_path = os.path.join(\n        devkit_path, 'VOC' + year, 'Annotations', '{:s}.xml')\n    image_set_path = os.path.join(\n        devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt')\n    return dict(\n        year=year,\n        image_set=image_set,\n        devkit_path=devkit_path,\n        anno_path=anno_path,\n        image_set_path=image_set_path)\n"
  },
  {
    "path": "detectron/datasets/voc_eval.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Bharath Hariharan\n# --------------------------------------------------------\n\n\"\"\"Python implementation of the PASCAL VOC devkit's AP evaluation code.\"\"\"\n\nimport logging\nimport numpy as np\nimport os\nimport xml.etree.ElementTree as ET\n\nfrom detectron.utils.io import load_object\nfrom detectron.utils.io import save_object\n\nlogger = logging.getLogger(__name__)\n\n\ndef parse_rec(filename):\n    \"\"\"Parse a PASCAL VOC xml file.\"\"\"\n    tree = ET.parse(filename)\n    objects = []\n    for obj in tree.findall('object'):\n        obj_struct = {}\n        obj_struct['name'] = obj.find('name').text\n        obj_struct['pose'] = obj.find('pose').text\n        obj_struct['truncated'] = int(obj.find('truncated').text)\n        obj_struct['difficult'] = int(obj.find('difficult').text)\n        bbox = obj.find('bndbox')\n        obj_struct['bbox'] = [int(bbox.find('xmin').text),\n                              int(bbox.find('ymin').text),\n                              int(bbox.find('xmax').text),\n                              int(bbox.find('ymax').text)]\n        objects.append(obj_struct)\n\n    return objects\n\n\ndef voc_ap(rec, prec, use_07_metric=False):\n    \"\"\"Compute VOC AP given precision and recall. If use_07_metric is true, uses\n    the VOC 07 11-point method (default:False).\n    \"\"\"\n    if use_07_metric:\n        # 11 point metric\n        ap = 0.\n        for t in np.arange(0., 1.1, 0.1):\n            if np.sum(rec >= t) == 0:\n                p = 0\n            else:\n                p = np.max(prec[rec >= t])\n            ap = ap + p / 11.\n    else:\n        # correct AP calculation\n        # first append sentinel values at the end\n        mrec = np.concatenate(([0.], rec, [1.]))\n        mpre = np.concatenate(([0.], prec, [0.]))\n\n        # compute the precision envelope\n        for i in range(mpre.size - 1, 0, -1):\n            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])\n\n        # to calculate area under PR curve, look for points\n        # where X axis (recall) changes value\n        i = np.where(mrec[1:] != mrec[:-1])[0]\n\n        # and sum (\\Delta recall) * prec\n        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])\n    return ap\n\n\ndef voc_eval(detpath,\n             annopath,\n             imagesetfile,\n             classname,\n             cachedir,\n             ovthresh=0.5,\n             use_07_metric=False):\n    \"\"\"rec, prec, ap = voc_eval(detpath,\n                                annopath,\n                                imagesetfile,\n                                classname,\n                                [ovthresh],\n                                [use_07_metric])\n\n    Top level function that does the PASCAL VOC evaluation.\n\n    detpath: Path to detections\n        detpath.format(classname) should produce the detection results file.\n    annopath: Path to annotations\n        annopath.format(imagename) should be the xml annotations file.\n    imagesetfile: Text file containing the list of images, one image per line.\n    classname: Category name (duh)\n    cachedir: Directory for caching the annotations\n    [ovthresh]: Overlap threshold (default = 0.5)\n    [use_07_metric]: Whether to use VOC07's 11 point AP computation\n        (default False)\n    \"\"\"\n    # assumes detections are in detpath.format(classname)\n    # assumes annotations are in annopath.format(imagename)\n    # assumes imagesetfile is a text file with each line an image name\n    # cachedir caches the annotations in a pickle file\n\n    # first load gt\n    if not os.path.isdir(cachedir):\n        os.mkdir(cachedir)\n    imageset = os.path.splitext(os.path.basename(imagesetfile))[0]\n    cachefile = os.path.join(cachedir, imageset + '_annots.pkl')\n    # read list of images\n    with open(imagesetfile, 'r') as f:\n        lines = f.readlines()\n    imagenames = [x.strip() for x in lines]\n\n    if not os.path.isfile(cachefile):\n        # load annots\n        recs = {}\n        for i, imagename in enumerate(imagenames):\n            recs[imagename] = parse_rec(annopath.format(imagename))\n            if i % 100 == 0:\n                logger.info(\n                    'Reading annotation for {:d}/{:d}'.format(\n                        i + 1, len(imagenames)))\n        # save\n        logger.info('Saving cached annotations to {:s}'.format(cachefile))\n        save_object(recs, cachefile)\n    else:\n        recs = load_object(cachefile)\n\n    # extract gt objects for this class\n    class_recs = {}\n    npos = 0\n    for imagename in imagenames:\n        R = [obj for obj in recs[imagename] if obj['name'] == classname]\n        bbox = np.array([x['bbox'] for x in R])\n        difficult = np.array([x['difficult'] for x in R]).astype(bool)\n        det = [False] * len(R)\n        npos = npos + sum(~difficult)\n        class_recs[imagename] = {'bbox': bbox,\n                                 'difficult': difficult,\n                                 'det': det}\n\n    # read dets\n    detfile = detpath.format(classname)\n    with open(detfile, 'r') as f:\n        lines = f.readlines()\n\n    splitlines = [x.strip().split(' ') for x in lines]\n    image_ids = [x[0] for x in splitlines]\n    confidence = np.array([float(x[1]) for x in splitlines])\n    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])\n\n    # sort by confidence\n    sorted_ind = np.argsort(-confidence)\n    BB = BB[sorted_ind, :]\n    image_ids = [image_ids[x] for x in sorted_ind]\n\n    # go down dets and mark TPs and FPs\n    nd = len(image_ids)\n    tp = np.zeros(nd)\n    fp = np.zeros(nd)\n    for d in range(nd):\n        R = class_recs[image_ids[d]]\n        bb = BB[d, :].astype(float)\n        ovmax = -np.inf\n        BBGT = R['bbox'].astype(float)\n\n        if BBGT.size > 0:\n            # compute overlaps\n            # intersection\n            ixmin = np.maximum(BBGT[:, 0], bb[0])\n            iymin = np.maximum(BBGT[:, 1], bb[1])\n            ixmax = np.minimum(BBGT[:, 2], bb[2])\n            iymax = np.minimum(BBGT[:, 3], bb[3])\n            iw = np.maximum(ixmax - ixmin + 1., 0.)\n            ih = np.maximum(iymax - iymin + 1., 0.)\n            inters = iw * ih\n\n            # union\n            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +\n                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *\n                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)\n\n            overlaps = inters / uni\n            ovmax = np.max(overlaps)\n            jmax = np.argmax(overlaps)\n\n        if ovmax > ovthresh:\n            if not R['difficult'][jmax]:\n                if not R['det'][jmax]:\n                    tp[d] = 1.\n                    R['det'][jmax] = 1\n                else:\n                    fp[d] = 1.\n        else:\n            fp[d] = 1.\n\n    # compute precision recall\n    fp = np.cumsum(fp)\n    tp = np.cumsum(tp)\n    rec = tp / float(npos)\n    # avoid divide by zero in case the first detection matches a difficult\n    # ground truth\n    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)\n    ap = voc_ap(rec, prec, use_07_metric)\n\n    return rec, prec, ap\n"
  },
  {
    "path": "detectron/modeling/FPN.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Functions for using a Feature Pyramid Network (FPN).\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport collections\nimport numpy as np\n\nfrom detectron.core.config import cfg\nfrom detectron.modeling.generate_anchors import generate_anchors\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\nfrom detectron.utils.net import get_group_gn\nimport detectron.modeling.ResNet as ResNet\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\n\n# Lowest and highest pyramid levels in the backbone network. For FPN, we assume\n# that all networks have 5 spatial reductions, each by a factor of 2. Level 1\n# would correspond to the input image, hence it does not make sense to use it.\nLOWEST_BACKBONE_LVL = 2   # E.g., \"conv2\"-like level\nHIGHEST_BACKBONE_LVL = 5  # E.g., \"conv5\"-like level\n\n\n# ---------------------------------------------------------------------------- #\n# FPN with ResNet\n# ---------------------------------------------------------------------------- #\n\ndef add_fpn_ResNet50_conv5_body(model):\n    return add_fpn_onto_conv_body(\n        model, ResNet.add_ResNet50_conv5_body, fpn_level_info_ResNet50_conv5\n    )\n\n\ndef add_fpn_ResNet50_conv5_P2only_body(model):\n    return add_fpn_onto_conv_body(\n        model,\n        ResNet.add_ResNet50_conv5_body,\n        fpn_level_info_ResNet50_conv5,\n        P2only=True\n    )\n\n\ndef add_fpn_ResNet101_conv5_body(model):\n    return add_fpn_onto_conv_body(\n        model, ResNet.add_ResNet101_conv5_body, fpn_level_info_ResNet101_conv5\n    )\n\n\ndef add_fpn_ResNet101_conv5_P2only_body(model):\n    return add_fpn_onto_conv_body(\n        model,\n        ResNet.add_ResNet101_conv5_body,\n        fpn_level_info_ResNet101_conv5,\n        P2only=True\n    )\n\n\ndef add_fpn_ResNet152_conv5_body(model):\n    return add_fpn_onto_conv_body(\n        model, ResNet.add_ResNet152_conv5_body, fpn_level_info_ResNet152_conv5\n    )\n\n\ndef add_fpn_ResNet152_conv5_P2only_body(model):\n    return add_fpn_onto_conv_body(\n        model,\n        ResNet.add_ResNet152_conv5_body,\n        fpn_level_info_ResNet152_conv5,\n        P2only=True\n    )\n\n\n# ---------------------------------------------------------------------------- #\n# Functions for bolting FPN onto a backbone architectures\n# ---------------------------------------------------------------------------- #\n\ndef add_fpn_onto_conv_body(\n    model, conv_body_func, fpn_level_info_func, P2only=False\n):\n    \"\"\"Add the specified conv body to the model and then add FPN levels to it.\n    \"\"\"\n    # Note: blobs_conv is in revsersed order: [fpn5, fpn4, fpn3, fpn2]\n    # similarly for dims_conv: [2048, 1024, 512, 256]\n    # similarly for spatial_scales_fpn: [1/32, 1/16, 1/8, 1/4]\n\n    conv_body_func(model)\n    blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(\n        model, fpn_level_info_func()\n    )\n\n    if P2only:\n        # use only the finest level\n        return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]\n    else:\n        # use all levels\n        return blobs_fpn, dim_fpn, spatial_scales_fpn\n\n\ndef add_fpn(model, fpn_level_info):\n    \"\"\"Add FPN connections based on the model described in the FPN paper.\"\"\"\n    # FPN levels are built starting from the highest/coarest level of the\n    # backbone (usually \"conv5\"). First we build down, recursively constructing\n    # lower/finer resolution FPN levels. Then we build up, constructing levels\n    # that are even higher/coarser than the starting level.\n    fpn_dim = cfg.FPN.DIM\n    min_level, max_level = get_min_max_levels()\n    # Count the number of backbone stages that we will generate FPN levels for\n    # starting from the coarest backbone stage (usually the \"conv5\"-like level)\n    # E.g., if the backbone level info defines stages 4 stages: \"conv5\",\n    # \"conv4\", ... \"conv2\" and min_level=2, then we end up with 4 - (2 - 2) = 4\n    # backbone stages to add FPN to.\n    num_backbone_stages = (\n        len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)\n    )\n\n    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]\n    output_blobs = [\n        'fpn_inner_{}'.format(s)\n        for s in fpn_level_info.blobs[:num_backbone_stages]\n    ]\n    fpn_dim_lateral = fpn_level_info.dims\n    xavier_fill = ('XavierFill', {})\n\n    # For the coarsest backbone level: 1x1 conv only seeds recursion\n    if cfg.FPN.USE_GN:\n        # use GroupNorm\n        c = model.ConvGN(\n            lateral_input_blobs[0],\n            output_blobs[0],  # note: this is a prefix\n            dim_in=fpn_dim_lateral[0],\n            dim_out=fpn_dim,\n            group_gn=get_group_gn(fpn_dim),\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=xavier_fill,\n            bias_init=const_fill(0.0)\n        )\n        output_blobs[0] = c  # rename it\n    else:\n        model.Conv(\n            lateral_input_blobs[0],\n            output_blobs[0],\n            dim_in=fpn_dim_lateral[0],\n            dim_out=fpn_dim,\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=xavier_fill,\n            bias_init=const_fill(0.0)\n        )\n\n    #\n    # Step 1: recursively build down starting from the coarsest backbone level\n    #\n\n    # For other levels add top-down and lateral connections\n    for i in range(num_backbone_stages - 1):\n        add_topdown_lateral_module(\n            model,\n            output_blobs[i],             # top-down blob\n            lateral_input_blobs[i + 1],  # lateral blob\n            output_blobs[i + 1],         # next output blob\n            fpn_dim,                     # output dimension\n            fpn_dim_lateral[i + 1]       # lateral input dimension\n        )\n\n    # Post-hoc scale-specific 3x3 convs\n    blobs_fpn = []\n    spatial_scales = []\n    for i in range(num_backbone_stages):\n        if cfg.FPN.USE_GN:\n            # use GroupNorm\n            fpn_blob = model.ConvGN(\n                output_blobs[i],\n                'fpn_{}'.format(fpn_level_info.blobs[i]),\n                dim_in=fpn_dim,\n                dim_out=fpn_dim,\n                group_gn=get_group_gn(fpn_dim),\n                kernel=3,\n                pad=1,\n                stride=1,\n                weight_init=xavier_fill,\n                bias_init=const_fill(0.0)\n            )\n        else:\n            fpn_blob = model.Conv(\n                output_blobs[i],\n                'fpn_{}'.format(fpn_level_info.blobs[i]),\n                dim_in=fpn_dim,\n                dim_out=fpn_dim,\n                kernel=3,\n                pad=1,\n                stride=1,\n                weight_init=xavier_fill,\n                bias_init=const_fill(0.0)\n            )\n        blobs_fpn += [fpn_blob]\n        spatial_scales += [fpn_level_info.spatial_scales[i]]\n\n    #\n    # Step 2: build up starting from the coarsest backbone level\n    #\n\n    # Check if we need the P6 feature map\n    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:\n        # Original FPN P6 level implementation from our CVPR'17 FPN paper\n        P6_blob_in = blobs_fpn[0]\n        P6_name = P6_blob_in + '_subsampled_2x'\n        # Use max pooling to simulate stride 2 subsampling\n        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)\n        blobs_fpn.insert(0, P6_blob)\n        spatial_scales.insert(0, spatial_scales[0] * 0.5)\n\n    # Coarser FPN levels introduced for RetinaNet\n    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:\n        fpn_blob = fpn_level_info.blobs[0]\n        dim_in = fpn_level_info.dims[0]\n        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):\n            fpn_blob_in = fpn_blob\n            if i > HIGHEST_BACKBONE_LVL + 1:\n                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')\n            fpn_blob = model.Conv(\n                fpn_blob_in,\n                'fpn_' + str(i),\n                dim_in=dim_in,\n                dim_out=fpn_dim,\n                kernel=3,\n                pad=1,\n                stride=2,\n                weight_init=xavier_fill,\n                bias_init=const_fill(0.0)\n            )\n            dim_in = fpn_dim\n            blobs_fpn.insert(0, fpn_blob)\n            spatial_scales.insert(0, spatial_scales[0] * 0.5)\n\n    return blobs_fpn, fpn_dim, spatial_scales\n\n\ndef add_topdown_lateral_module(\n    model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral\n):\n    \"\"\"Add a top-down lateral module.\"\"\"\n    # Lateral 1x1 conv\n    if cfg.FPN.USE_GN:\n        # use GroupNorm\n        lat = model.ConvGN(\n            fpn_lateral,\n            fpn_bottom + '_lateral',\n            dim_in=dim_lateral,\n            dim_out=dim_top,\n            group_gn=get_group_gn(dim_top),\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=(\n                const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL\n                else ('XavierFill', {})),\n            bias_init=const_fill(0.0)\n        )\n    else:\n        lat = model.Conv(\n            fpn_lateral,\n            fpn_bottom + '_lateral',\n            dim_in=dim_lateral,\n            dim_out=dim_top,\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=(\n                const_fill(0.0)\n                if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})\n            ),\n            bias_init=const_fill(0.0)\n        )\n    # Top-down 2x upsampling\n    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)\n    # Sum lateral and top-down\n    model.net.Sum([lat, td], fpn_bottom)\n\n\ndef get_min_max_levels():\n    \"\"\"The min and max FPN levels required for supporting RPN and/or RoI\n    transform operations on multiple FPN levels.\n    \"\"\"\n    min_level = LOWEST_BACKBONE_LVL\n    max_level = HIGHEST_BACKBONE_LVL\n    if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:\n        max_level = cfg.FPN.RPN_MAX_LEVEL\n        min_level = cfg.FPN.RPN_MIN_LEVEL\n    if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:\n        max_level = cfg.FPN.ROI_MAX_LEVEL\n        min_level = cfg.FPN.ROI_MIN_LEVEL\n    if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:\n        max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)\n        min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)\n    return min_level, max_level\n\n\n# ---------------------------------------------------------------------------- #\n# RPN with an FPN backbone\n# ---------------------------------------------------------------------------- #\n\ndef add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):\n    \"\"\"Add RPN on FPN specific outputs.\"\"\"\n    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)\n    dim_out = dim_in\n\n    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid\n    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid\n    assert len(blobs_in) == k_max - k_min + 1\n    for lvl in range(k_min, k_max + 1):\n        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order\n        sc = spatial_scales[k_max - lvl]  # in reversed order\n        slvl = str(lvl)\n\n        if lvl == k_min:\n            # Create conv ops with randomly initialized weights and\n            # zeroed biases for the first FPN level; these will be shared by\n            # all other FPN levels\n            # RPN hidden representation\n            conv_rpn_fpn = model.Conv(\n                bl_in,\n                'conv_rpn_fpn' + slvl,\n                dim_in,\n                dim_out,\n                kernel=3,\n                pad=1,\n                stride=1,\n                weight_init=gauss_fill(0.01),\n                bias_init=const_fill(0.0)\n            )\n            model.Relu(conv_rpn_fpn, conv_rpn_fpn)\n            # Proposal classification scores\n            rpn_cls_logits_fpn = model.Conv(\n                conv_rpn_fpn,\n                'rpn_cls_logits_fpn' + slvl,\n                dim_in,\n                num_anchors,\n                kernel=1,\n                pad=0,\n                stride=1,\n                weight_init=gauss_fill(0.01),\n                bias_init=const_fill(0.0)\n            )\n            # Proposal bbox regression deltas\n            rpn_bbox_pred_fpn = model.Conv(\n                conv_rpn_fpn,\n                'rpn_bbox_pred_fpn' + slvl,\n                dim_in,\n                4 * num_anchors,\n                kernel=1,\n                pad=0,\n                stride=1,\n                weight_init=gauss_fill(0.01),\n                bias_init=const_fill(0.0)\n            )\n        else:\n            # Share weights and biases\n            sk_min = str(k_min)\n            # RPN hidden representation\n            conv_rpn_fpn = model.ConvShared(\n                bl_in,\n                'conv_rpn_fpn' + slvl,\n                dim_in,\n                dim_out,\n                kernel=3,\n                pad=1,\n                stride=1,\n                weight='conv_rpn_fpn' + sk_min + '_w',\n                bias='conv_rpn_fpn' + sk_min + '_b'\n            )\n            model.Relu(conv_rpn_fpn, conv_rpn_fpn)\n            # Proposal classification scores\n            rpn_cls_logits_fpn = model.ConvShared(\n                conv_rpn_fpn,\n                'rpn_cls_logits_fpn' + slvl,\n                dim_in,\n                num_anchors,\n                kernel=1,\n                pad=0,\n                stride=1,\n                weight='rpn_cls_logits_fpn' + sk_min + '_w',\n                bias='rpn_cls_logits_fpn' + sk_min + '_b'\n            )\n            # Proposal bbox regression deltas\n            rpn_bbox_pred_fpn = model.ConvShared(\n                conv_rpn_fpn,\n                'rpn_bbox_pred_fpn' + slvl,\n                dim_in,\n                4 * num_anchors,\n                kernel=1,\n                pad=0,\n                stride=1,\n                weight='rpn_bbox_pred_fpn' + sk_min + '_w',\n                bias='rpn_bbox_pred_fpn' + sk_min + '_b'\n            )\n\n        if not model.train or cfg.MODEL.FASTER_RCNN:\n            # Proposals are needed during:\n            #  1) inference (== not model.train) for RPN only and Faster R-CNN\n            #  OR\n            #  2) training for Faster R-CNN\n            # Otherwise (== training for RPN only), proposals are not needed\n            lvl_anchors = generate_anchors(\n                stride=2.**lvl,\n                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),\n                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS\n            )\n            rpn_cls_probs_fpn = model.net.Sigmoid(\n                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl\n            )\n            model.GenerateProposals(\n                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],\n                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],\n                anchors=lvl_anchors,\n                spatial_scale=sc\n            )\n\n\ndef add_fpn_rpn_losses(model):\n    \"\"\"Add RPN on FPN specific losses.\"\"\"\n    loss_gradients = {}\n    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):\n        slvl = str(lvl)\n        # Spatially narrow the full-sized RPN label arrays to match the feature map\n        # shape\n        model.net.SpatialNarrowAs(\n            ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],\n            'rpn_labels_int32_fpn' + slvl\n        )\n        for key in ('targets', 'inside_weights', 'outside_weights'):\n            model.net.SpatialNarrowAs(\n                [\n                    'rpn_bbox_' + key + '_wide_fpn' + slvl,\n                    'rpn_bbox_pred_fpn' + slvl\n                ],\n                'rpn_bbox_' + key + '_fpn' + slvl\n            )\n        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(\n            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],\n            'loss_rpn_cls_fpn' + slvl,\n            normalize=0,\n            scale=(\n                model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /\n                cfg.TRAIN.IMS_PER_BATCH\n            )\n        )\n        # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is\n        # handled by (1) setting bbox outside weights and (2) SmoothL1Loss\n        # normalizes by IMS_PER_BATCH\n        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(\n            [\n                'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,\n                'rpn_bbox_inside_weights_fpn' + slvl,\n                'rpn_bbox_outside_weights_fpn' + slvl\n            ],\n            'loss_rpn_bbox_fpn' + slvl,\n            beta=1. / 9.,\n            scale=model.GetLossScale(),\n        )\n        loss_gradients.update(\n            blob_utils.\n            get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])\n        )\n        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])\n    return loss_gradients\n\n\n# ---------------------------------------------------------------------------- #\n# Helper functions for working with multilevel FPN RoIs\n# ---------------------------------------------------------------------------- #\n\ndef map_rois_to_fpn_levels(rois, k_min, k_max):\n    \"\"\"Determine which FPN level each RoI in a set of RoIs should map to based\n    on the heuristic in the FPN paper.\n    \"\"\"\n    # Compute level ids\n    s = np.sqrt(box_utils.boxes_area(rois))\n    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224\n    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4\n\n    # Eqn.(1) in FPN paper\n    target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))\n    target_lvls = np.clip(target_lvls, k_min, k_max)\n    return target_lvls\n\n\ndef add_multilevel_roi_blobs(\n    blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max\n):\n    \"\"\"Add RoI blobs for multiple FPN levels to the blobs dict.\n\n    blobs: a dict mapping from blob name to numpy ndarray\n    blob_prefix: name prefix to use for the FPN blobs\n    rois: the source rois as a 2D numpy array of shape (N, 5) where each row is\n      an roi and the columns encode (batch_idx, x1, y1, x2, y2)\n    target_lvls: numpy array of shape (N, ) indicating which FPN level each roi\n      in rois should be assigned to\n    lvl_min: the finest (highest resolution) FPN level (e.g., 2)\n    lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)\n    \"\"\"\n    rois_idx_order = np.empty((0, ))\n    rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert\n    for lvl in range(lvl_min, lvl_max + 1):\n        idx_lvl = np.where(target_lvls == lvl)[0]\n        blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]\n        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))\n        rois_stacked = np.vstack(\n            [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]\n        )\n    rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)\n    blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore\n    # Sanity check that restore order is correct\n    assert (rois_stacked[rois_idx_restore] == rois).all()\n\n\n# ---------------------------------------------------------------------------- #\n# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)\n# ---------------------------------------------------------------------------- #\n\nFpnLevelInfo = collections.namedtuple(\n    'FpnLevelInfo',\n    ['blobs', 'dims', 'spatial_scales']\n)\n\n\ndef fpn_level_info_ResNet50_conv5():\n    return FpnLevelInfo(\n        blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),\n        dims=(2048, 1024, 512, 256),\n        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)\n    )\n\n\ndef fpn_level_info_ResNet101_conv5():\n    return FpnLevelInfo(\n        blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),\n        dims=(2048, 1024, 512, 256),\n        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)\n    )\n\n\ndef fpn_level_info_ResNet152_conv5():\n    return FpnLevelInfo(\n        blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),\n        dims=(2048, 1024, 512, 256),\n        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)\n    )\n"
  },
  {
    "path": "detectron/modeling/ResNet.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Implements ResNet and ResNeXt.\n\nSee: https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.net import get_group_gn\n\n\n# ---------------------------------------------------------------------------- #\n# Bits for specific architectures (ResNet50, ResNet101, ...)\n# ---------------------------------------------------------------------------- #\n\n\ndef add_ResNet50_conv4_body(model):\n    return add_ResNet_convX_body(model, (3, 4, 6))\n\n\ndef add_ResNet50_conv5_body(model):\n    return add_ResNet_convX_body(model, (3, 4, 6, 3))\n\n\ndef add_ResNet101_conv4_body(model):\n    return add_ResNet_convX_body(model, (3, 4, 23))\n\n\ndef add_ResNet101_conv5_body(model):\n    return add_ResNet_convX_body(model, (3, 4, 23, 3))\n\n\ndef add_ResNet152_conv5_body(model):\n    return add_ResNet_convX_body(model, (3, 8, 36, 3))\n\n\n# ---------------------------------------------------------------------------- #\n# Generic ResNet components\n# ---------------------------------------------------------------------------- #\n\n\ndef add_stage(\n    model,\n    prefix,\n    blob_in,\n    n,\n    dim_in,\n    dim_out,\n    dim_inner,\n    dilation,\n    stride_init=2\n):\n    \"\"\"Add a ResNet stage to the model by stacking n residual blocks.\"\"\"\n    # e.g., prefix = res2\n    for i in range(n):\n        blob_in = add_residual_block(\n            model,\n            '{}_{}'.format(prefix, i),\n            blob_in,\n            dim_in,\n            dim_out,\n            dim_inner,\n            dilation,\n            stride_init,\n            # Not using inplace for the last block;\n            # it may be fetched externally or used by FPN\n            inplace_sum=i < n - 1\n        )\n        dim_in = dim_out\n    return blob_in, dim_in\n\n\ndef add_ResNet_convX_body(model, block_counts):\n    \"\"\"Add a ResNet body from input data up through the res5 (aka conv5) stage.\n    The final res5/conv5 stage may be optionally excluded (hence convX, where\n    X = 4 or 5).\"\"\"\n    freeze_at = cfg.TRAIN.FREEZE_AT\n    assert freeze_at in [0, 2, 3, 4, 5]\n\n    # add the stem (by default, conv1 and pool1 with bn; can support gn)\n    p, dim_in = globals()[cfg.RESNETS.STEM_FUNC](model, 'data')\n\n    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP\n    (n1, n2, n3) = block_counts[:3]\n    s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1)\n    if freeze_at == 2:\n        model.StopGradient(s, s)\n    s, dim_in = add_stage(\n        model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1\n    )\n    if freeze_at == 3:\n        model.StopGradient(s, s)\n    s, dim_in = add_stage(\n        model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1\n    )\n    if freeze_at == 4:\n        model.StopGradient(s, s)\n    if len(block_counts) == 4:\n        n4 = block_counts[3]\n        s, dim_in = add_stage(\n            model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8,\n            cfg.RESNETS.RES5_DILATION\n        )\n        if freeze_at == 5:\n            model.StopGradient(s, s)\n        return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION\n    else:\n        return s, dim_in, 1. / 16.\n\n\ndef add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Adds an RoI feature transformation (e.g., RoI pooling) followed by a\n    res5/conv5 head applied to each RoI.\"\"\"\n    # TODO(rbg): This contains Fast R-CNN specific config options making it non-\n    # reusable; make this more generic with model-specific wrappers\n    model.RoIFeatureTransform(\n        blob_in,\n        'pool5',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP\n    stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)\n    s, dim_in = add_stage(\n        model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1,\n        stride_init\n    )\n    s = model.AveragePool(s, 'res5_pool', kernel=7)\n    return s, 2048\n\n\ndef add_residual_block(\n    model,\n    prefix,\n    blob_in,\n    dim_in,\n    dim_out,\n    dim_inner,\n    dilation,\n    stride_init=2,\n    inplace_sum=False\n):\n    \"\"\"Add a residual block to the model.\"\"\"\n    # prefix = res<stage>_<sub_stage>, e.g., res2_3\n\n    # Max pooling is performed prior to the first stage (which is uniquely\n    # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage\n    stride = stride_init if (\n        dim_in != dim_out and dim_in != 64 and dilation == 1\n    ) else 1\n\n    # transformation blob\n    tr = globals()[cfg.RESNETS.TRANS_FUNC](\n        model,\n        blob_in,\n        dim_in,\n        dim_out,\n        stride,\n        prefix,\n        dim_inner,\n        group=cfg.RESNETS.NUM_GROUPS,\n        dilation=dilation\n    )\n\n    # sum -> ReLU\n    # shortcut function: by default using bn; support gn\n    add_shortcut = globals()[cfg.RESNETS.SHORTCUT_FUNC]\n    sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)\n    if inplace_sum:\n        s = model.net.Sum([tr, sc], tr)\n    else:\n        s = model.net.Sum([tr, sc], prefix + '_sum')\n\n    return model.Relu(s, s)\n\n\n# ------------------------------------------------------------------------------\n# various shortcuts (may expand and may consider a new helper)\n# ------------------------------------------------------------------------------\n\n\ndef basic_bn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):\n    \"\"\" For a pre-trained network that used BN. An AffineChannel op replaces BN\n    during fine-tuning.\n    \"\"\"\n\n    if dim_in == dim_out:\n        return blob_in\n\n    c = model.Conv(\n        blob_in,\n        prefix + '_branch1',\n        dim_in,\n        dim_out,\n        kernel=1,\n        stride=stride,\n        no_bias=1\n    )\n    return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)\n\n\ndef basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):\n    if dim_in == dim_out:\n        return blob_in\n\n    # output name is prefix + '_branch1_gn'\n    return model.ConvGN(\n        blob_in,\n        prefix + '_branch1',\n        dim_in,\n        dim_out,\n        kernel=1,\n        group_gn=get_group_gn(dim_out),\n        stride=stride,\n        pad=0,\n        group=1,\n    )\n\n\n# ------------------------------------------------------------------------------\n# various stems (may expand and may consider a new helper)\n# ------------------------------------------------------------------------------\n\n\ndef basic_bn_stem(model, data, **kwargs):\n    \"\"\"Add a basic ResNet stem. For a pre-trained network that used BN.\n    An AffineChannel op replaces BN during fine-tuning.\n    \"\"\"\n\n    dim = 64\n    p = model.Conv(data, 'conv1', 3, dim, 7, pad=3, stride=2, no_bias=1)\n    p = model.AffineChannel(p, 'res_conv1_bn', dim=dim, inplace=True)\n    p = model.Relu(p, p)\n    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)\n    return p, dim\n\n\ndef basic_gn_stem(model, data, **kwargs):\n    \"\"\"Add a basic ResNet stem (using GN)\"\"\"\n\n    dim = 64\n    p = model.ConvGN(\n        data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2\n    )\n    p = model.Relu(p, p)\n    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)\n    return p, dim\n\n\n# ------------------------------------------------------------------------------\n# various transformations (may expand and may consider a new helper)\n# ------------------------------------------------------------------------------\n\n\ndef bottleneck_transformation(\n    model,\n    blob_in,\n    dim_in,\n    dim_out,\n    stride,\n    prefix,\n    dim_inner,\n    dilation=1,\n    group=1\n):\n    \"\"\"Add a bottleneck transformation to the model.\"\"\"\n    # In original resnet, stride=2 is on 1x1.\n    # In fb.torch resnet, stride=2 is on 3x3.\n    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)\n\n    # conv 1x1 -> BN -> ReLU\n    cur = model.ConvAffine(\n        blob_in,\n        prefix + '_branch2a',\n        dim_in,\n        dim_inner,\n        kernel=1,\n        stride=str1x1,\n        pad=0,\n        inplace=True\n    )\n    cur = model.Relu(cur, cur)\n\n    # conv 3x3 -> BN -> ReLU\n    cur = model.ConvAffine(\n        cur,\n        prefix + '_branch2b',\n        dim_inner,\n        dim_inner,\n        kernel=3,\n        stride=str3x3,\n        pad=1 * dilation,\n        dilation=dilation,\n        group=group,\n        inplace=True\n    )\n    cur = model.Relu(cur, cur)\n\n    # conv 1x1 -> BN (no ReLU)\n    # NB: for now this AffineChannel op cannot be in-place due to a bug in C2\n    # gradient computation for graphs like this\n    cur = model.ConvAffine(\n        cur,\n        prefix + '_branch2c',\n        dim_inner,\n        dim_out,\n        kernel=1,\n        stride=1,\n        pad=0,\n        inplace=False\n    )\n    return cur\n\n\ndef bottleneck_gn_transformation(\n    model,\n    blob_in,\n    dim_in,\n    dim_out,\n    stride,\n    prefix,\n    dim_inner,\n    dilation=1,\n    group=1\n):\n    \"\"\"Add a bottleneck transformation with GroupNorm to the model.\"\"\"\n    # In original resnet, stride=2 is on 1x1.\n    # In fb.torch resnet, stride=2 is on 3x3.\n    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)\n\n    # conv 1x1 -> GN -> ReLU\n    cur = model.ConvGN(\n        blob_in,\n        prefix + '_branch2a',\n        dim_in,\n        dim_inner,\n        kernel=1,\n        group_gn=get_group_gn(dim_inner),\n        stride=str1x1,\n        pad=0,\n    )\n    cur = model.Relu(cur, cur)\n\n    # conv 3x3 -> GN -> ReLU\n    cur = model.ConvGN(\n        cur,\n        prefix + '_branch2b',\n        dim_inner,\n        dim_inner,\n        kernel=3,\n        group_gn=get_group_gn(dim_inner),\n        stride=str3x3,\n        pad=1 * dilation,\n        dilation=dilation,\n        group=group,\n    )\n    cur = model.Relu(cur, cur)\n\n    # conv 1x1 -> GN (no ReLU)\n    cur = model.ConvGN(\n        cur,\n        prefix + '_branch2c',\n        dim_inner,\n        dim_out,\n        kernel=1,\n        group_gn=get_group_gn(dim_out),\n        stride=1,\n        pad=0,\n    )\n    return cur\n"
  },
  {
    "path": "detectron/modeling/VGG16.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"VGG16 from https://arxiv.org/abs/1409.1556.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\n\n\ndef add_VGG16_conv5_body(model):\n    model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)\n    model.Relu('conv1_1', 'conv1_1')\n    model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)\n    model.Relu('conv1_2', 'conv1_2')\n    model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)\n    model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)\n    model.Relu('conv2_1', 'conv2_1')\n    model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)\n    model.Relu('conv2_2', 'conv2_2')\n    model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)\n    model.StopGradient('pool2', 'pool2')\n    model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)\n    model.Relu('conv3_1', 'conv3_1')\n    model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)\n    model.Relu('conv3_2', 'conv3_2')\n    model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)\n    model.Relu('conv3_3', 'conv3_3')\n    model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)\n    model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)\n    model.Relu('conv4_1', 'conv4_1')\n    model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)\n    model.Relu('conv4_2', 'conv4_2')\n    model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)\n    model.Relu('conv4_3', 'conv4_3')\n    model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)\n    model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)\n    model.Relu('conv5_1', 'conv5_1')\n    model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)\n    model.Relu('conv5_2', 'conv5_2')\n    model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)\n    blob_out = model.Relu('conv5_3', 'conv5_3')\n    return blob_out, 512, 1. / 16.\n\n\ndef add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):\n    model.RoIFeatureTransform(\n        blob_in,\n        'pool5',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=7,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n    model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)\n    model.Relu('fc6', 'fc6')\n    model.FC('fc6', 'fc7', 4096, 4096)\n    blob_out = model.Relu('fc7', 'fc7')\n    return blob_out, 4096\n"
  },
  {
    "path": "detectron/modeling/VGG_CNN_M_1024.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\n\n\ndef add_VGG_CNN_M_1024_conv5_body(model):\n    model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)\n    model.Relu('conv1', 'conv1')\n    model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)\n    model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)\n    model.StopGradient('pool1', 'pool1')\n    # No updates at conv1 and below (norm1 and pool1 have no params,\n    # so we can stop gradients before them, too)\n    model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)\n    model.Relu('conv2', 'conv2')\n    model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)\n    model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)\n    model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)\n    model.Relu('conv3', 'conv3')\n    model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)\n    model.Relu('conv4', 'conv4')\n    model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)\n    blob_out = model.Relu('conv5', 'conv5')\n    return blob_out, 512, 1. / 16.\n\n\ndef add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):\n    model.RoIFeatureTransform(\n        blob_in,\n        'pool5',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=6,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n    model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)\n    model.Relu('fc6', 'fc6')\n    model.FC('fc6', 'fc7', 4096, 1024)\n    blob_out = model.Relu('fc7', 'fc7')\n    return blob_out, 1024\n"
  },
  {
    "path": "detectron/modeling/__init__.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n"
  },
  {
    "path": "detectron/modeling/detector.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Defines DetectionModelHelper, the class that represents a Detectron model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport logging\n\nfrom caffe2.python import cnn\nfrom caffe2.python import core\nfrom caffe2.python import workspace\nfrom caffe2.python.modeling import initializers\nfrom caffe2.python.modeling.parameter_info import ParameterTags\n\nfrom detectron.core.config import cfg\nfrom detectron.ops.collect_and_distribute_fpn_rpn_proposals \\\n    import CollectAndDistributeFpnRpnProposalsOp\nfrom detectron.ops.generate_proposal_labels import GenerateProposalLabelsOp\nfrom detectron.ops.generate_proposals import GenerateProposalsOp\nimport detectron.roi_data.fast_rcnn as fast_rcnn_roi_data\nimport detectron.utils.c2 as c2_utils\n\nlogger = logging.getLogger(__name__)\n\n\nclass DetectionModelHelper(cnn.CNNModelHelper):\n    def __init__(self, **kwargs):\n        # Handle args specific to the DetectionModelHelper, others pass through\n        # to CNNModelHelper\n        self.train = kwargs.get('train', False)\n        self.num_classes = kwargs.get('num_classes', -1)\n        assert self.num_classes > 0, 'num_classes must be > 0'\n        for k in ('train', 'num_classes'):\n            if k in kwargs:\n                del kwargs[k]\n        kwargs['order'] = 'NCHW'\n        # Defensively set cudnn_exhaustive_search to False in case the default\n        # changes in CNNModelHelper. The detection code uses variable size\n        # inputs that might not play nicely with cudnn_exhaustive_search.\n        kwargs['cudnn_exhaustive_search'] = False\n        super(DetectionModelHelper, self).__init__(**kwargs)\n        self.roi_data_loader = None\n        self.losses = []\n        self.metrics = []\n        self.do_not_update_params = []  # Param on this list are not updated\n        self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE\n        self.net.Proto().num_workers = cfg.NUM_GPUS * 4\n        self.prev_use_cudnn = self.use_cudnn\n        self.gn_params = []  # Param on this list are GroupNorm parameters\n\n    def TrainableParams(self, gpu_id=-1):\n        \"\"\"Get the blob names for all trainable parameters, possibly filtered by\n        GPU id.\n        \"\"\"\n        return [\n            p for p in self.params\n            if (\n                p in self.param_to_grad and   # p has a gradient\n                p not in self.do_not_update_params and  # not on the blacklist\n                (gpu_id == -1 or  # filter for gpu assignment, if gpu_id set\n                 str(p).find('gpu_{}'.format(gpu_id)) == 0)\n            )]\n\n    def AffineChannel(self, blob_in, blob_out, dim, inplace=False):\n        \"\"\"Affine transformation to replace BN in networks where BN cannot be\n        used (e.g., because the minibatch size is too small).\n\n        The operations can be done in place to save memory.\n        \"\"\"\n        blob_out = blob_out or self.net.NextName()\n        param_prefix = blob_out\n\n        scale = self.create_param(\n            param_name=param_prefix + '_s',\n            initializer=initializers.Initializer(\"ConstantFill\", value=1.),\n            tags=ParameterTags.WEIGHT,\n            shape=[dim, ],\n        )\n        bias = self.create_param(\n            param_name=param_prefix + '_b',\n            initializer=initializers.Initializer(\"ConstantFill\", value=0.),\n            tags=ParameterTags.BIAS,\n            shape=[dim, ],\n        )\n        if inplace:\n            return self.net.AffineChannel([blob_in, scale, bias], blob_in)\n        else:\n            return self.net.AffineChannel([blob_in, scale, bias], blob_out)\n\n    def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):\n        \"\"\"Op for generating RPN porposals.\n\n        blobs_in:\n          - 'rpn_cls_probs': 4D tensor of shape (N, A, H, W), where N is the\n            number of minibatch images, A is the number of anchors per\n            locations, and (H, W) is the spatial size of the prediction grid.\n            Each value represents a \"probability of object\" rating in [0, 1].\n          - 'rpn_bbox_pred': 4D tensor of shape (N, 4 * A, H, W) of predicted\n            deltas for transformation anchor boxes into RPN proposals.\n          - 'im_info': 2D tensor of shape (N, 3) where the three columns encode\n            the input image's [height, width, scale]. Height and width are\n            for the input to the network, not the original image; scale is the\n            scale factor used to scale the original image to the network input\n            size.\n\n        blobs_out:\n          - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the\n            five columns encode [batch ind, x1, y1, x2, y2]. The boxes are\n            w.r.t. the network input, which is a *scaled* version of the\n            original image; these proposals must be scaled by 1 / scale (where\n            scale comes from im_info; see above) to transform it back to the\n            original input image coordinate system.\n          - 'rpn_roi_probs': 1D tensor of objectness probability scores\n            (extracted from rpn_cls_probs; see above).\n        \"\"\"\n        cfg_key = 'TRAIN' if self.train else 'TEST'\n\n        if cfg[cfg_key].GENERATE_PROPOSALS_ON_GPU:\n            rpn_pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N\n            rpn_post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N\n            rpn_nms_thresh = cfg[cfg_key].RPN_NMS_THRESH\n            rpn_min_size = float(cfg[cfg_key].RPN_MIN_SIZE)\n\n            input_name = str(blobs_in[0])\n            lvl = int(input_name[-1]) if input_name[-1].isdigit() else None\n            anchors_name = 'anchors{}'.format(lvl) if lvl else 'anchors'\n\n            for i in range(cfg.NUM_GPUS):\n                with c2_utils.CudaScope(i):\n                    workspace.FeedBlob(\n                        'gpu_{}/{}'.format(i, anchors_name),\n                        anchors.astype(np.float32))\n\n            self.net.GenerateProposals(\n                blobs_in + [anchors_name],\n                blobs_out,\n                spatial_scale=spatial_scale,\n                pre_nms_topN=rpn_pre_nms_topN,\n                post_nms_topN=rpn_post_nms_topN,\n                nms_thresh=rpn_nms_thresh,\n                min_size=rpn_min_size,\n            )\n        else:\n            name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])\n            # spatial_scale passed to the Python op is only used in\n            # convert_pkl_to_pb\n            self.net.Python(\n                GenerateProposalsOp(anchors, spatial_scale, self.train).forward\n            )(blobs_in, blobs_out, name=name, spatial_scale=spatial_scale)\n\n        return blobs_out\n\n    def GenerateProposalLabels(self, blobs_in):\n        \"\"\"Op for generating training labels for RPN proposals. This is used\n        when training RPN jointly with Fast/Mask R-CNN (as in end-to-end\n        Faster R-CNN training).\n\n        blobs_in:\n          - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals\n          - 'roidb': roidb entries that will be labeled\n          - 'im_info': See GenerateProposals doc.\n\n        blobs_out:\n          - (variable set of blobs): returns whatever blobs are required for\n            training the model. It does this by querying the data loader for\n            the list of blobs that are needed.\n        \"\"\"\n        name = 'GenerateProposalLabelsOp:' + ','.join(\n            [str(b) for b in blobs_in]\n        )\n\n        # The list of blobs is not known before run-time because it depends on\n        # the specific model being trained. Query the data loader to get the\n        # list of output blob names.\n        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(\n            is_training=self.train\n        )\n        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]\n\n        self.net.Python(GenerateProposalLabelsOp().forward)(\n            blobs_in, blobs_out, name=name\n        )\n        return blobs_out\n\n    def CollectAndDistributeFpnRpnProposals(self):\n        \"\"\"Merge RPN proposals generated at multiple FPN levels and then\n        distribute those proposals to their appropriate FPN levels. An anchor\n        at one FPN level may predict an RoI that will map to another level,\n        hence the need to redistribute the proposals.\n\n        This function assumes standard blob names for input and output blobs.\n\n        Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,\n                      rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]\n          - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois\n            documentation from GenerateProposals.\n          - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN\n            level i; see rpn_roi_probs documentation from GenerateProposals.\n\n        If used during training, then the input blobs will also include:\n          [roidb, im_info] (see GenerateProposalLabels).\n\n        Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,\n                       rois_idx_restore]\n          - rois_fpn<i> are the RPN proposals for FPN level i\n          - rois_idx_restore is a permutation on the concatenation of all\n            rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are\n            restored to their original order in the input blobs.\n\n        If used during training, then the output blobs will also include:\n          [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].\n        \"\"\"\n        k_max = cfg.FPN.RPN_MAX_LEVEL\n        k_min = cfg.FPN.RPN_MIN_LEVEL\n\n        # Prepare input blobs\n        rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]\n        score_names = [\n            'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)\n        ]\n        blobs_in = rois_names + score_names\n        if self.train:\n            blobs_in += ['roidb', 'im_info']\n        blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]\n        name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join(\n            [str(b) for b in blobs_in]\n        )\n\n        # Prepare output blobs\n        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(\n            is_training=self.train\n        )\n        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]\n\n        outputs = self.net.Python(\n            CollectAndDistributeFpnRpnProposalsOp(self.train).forward\n        )(blobs_in, blobs_out, name=name)\n\n        return outputs\n\n    def DropoutIfTraining(self, blob_in, dropout_rate):\n        \"\"\"Add dropout to blob_in if the model is in training mode and\n        dropout_rate is > 0.\"\"\"\n        blob_out = blob_in\n        if self.train and dropout_rate > 0:\n            blob_out = self.Dropout(\n                blob_in, blob_in, ratio=dropout_rate, is_test=False\n            )\n        return blob_out\n\n    def RoIFeatureTransform(\n        self,\n        blobs_in,\n        blob_out,\n        blob_rois='rois',\n        method='RoIPoolF',\n        resolution=7,\n        spatial_scale=1. / 16.,\n        sampling_ratio=0\n    ):\n        \"\"\"Add the specified RoI pooling method. The sampling_ratio argument\n        is supported for some, but not all, RoI transform methods.\n\n        RoIFeatureTransform abstracts away:\n          - Use of FPN or not\n          - Specifics of the transform method\n        \"\"\"\n        assert method in {'RoIPoolF', 'RoIAlign'}, \\\n            'Unknown pooling method: {}'.format(method)\n        has_argmax = (method == 'RoIPoolF')\n        if isinstance(blobs_in, list):\n            # FPN case: add RoIFeatureTransform to each FPN level\n            k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid\n            k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid\n            assert len(blobs_in) == k_max - k_min + 1\n            bl_out_list = []\n            for lvl in range(k_min, k_max + 1):\n                bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order\n                sc = spatial_scale[k_max - lvl]  # in reversed order\n                bl_rois = blob_rois + '_fpn' + str(lvl)\n                bl_out = blob_out + '_fpn' + str(lvl)\n                bl_out_list.append(bl_out)\n                bl_argmax = ['_argmax_' + bl_out] if has_argmax else []\n                self.net.__getattr__(method)(\n                    [bl_in, bl_rois], [bl_out] + bl_argmax,\n                    pooled_w=resolution,\n                    pooled_h=resolution,\n                    spatial_scale=sc,\n                    sampling_ratio=sampling_ratio\n                )\n            # The pooled features from all levels are concatenated along the\n            # batch dimension into a single 4D tensor.\n            xform_shuffled, _ = self.net.Concat(\n                bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out],\n                axis=0\n            )\n            # Unshuffle to match rois from dataloader\n            restore_bl = blob_rois + '_idx_restore_int32'\n            xform_out = self.net.BatchPermutation(\n                [xform_shuffled, restore_bl], blob_out\n            )\n        else:\n            # Single feature level\n            bl_argmax = ['_argmax_' + blob_out] if has_argmax else []\n            # sampling_ratio is ignored for RoIPoolF\n            xform_out = self.net.__getattr__(method)(\n                [blobs_in, blob_rois], [blob_out] + bl_argmax,\n                pooled_w=resolution,\n                pooled_h=resolution,\n                spatial_scale=spatial_scale,\n                sampling_ratio=sampling_ratio\n            )\n        # Only return the first blob (the transformed features)\n        return xform_out[0] if isinstance(xform_out, tuple) else xform_out\n\n    def ConvShared(\n        self,\n        blob_in,\n        blob_out,\n        dim_in,\n        dim_out,\n        kernel,\n        weight=None,\n        bias=None,\n        **kwargs\n    ):\n        \"\"\"Add conv op that shares weights and/or biases with another conv op.\n        \"\"\"\n        use_bias = (\n            False if ('no_bias' in kwargs and kwargs['no_bias']) else True\n        )\n\n        if self.use_cudnn:\n            kwargs['engine'] = 'CUDNN'\n            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search\n            if self.ws_nbytes_limit:\n                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit\n\n        if use_bias:\n            blobs_in = [blob_in, weight, bias]\n        else:\n            blobs_in = [blob_in, weight]\n\n        if 'no_bias' in kwargs:\n            del kwargs['no_bias']\n\n        return self.net.Conv(\n            blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs\n        )\n\n    def BilinearInterpolation(\n        self, blob_in, blob_out, dim_in, dim_out, up_scale\n    ):\n        \"\"\"Bilinear interpolation in space of scale.\n\n        Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale\n\n        Adapted from the CVPR'15 FCN code.\n        See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py\n        \"\"\"\n        assert dim_in == dim_out\n        assert up_scale % 2 == 0, 'Scale should be even'\n\n        def upsample_filt(size):\n            factor = (size + 1) // 2\n            if size % 2 == 1:\n                center = factor - 1\n            else:\n                center = factor - 0.5\n            og = np.ogrid[:size, :size]\n            return ((1 - abs(og[0] - center) / factor) *\n                    (1 - abs(og[1] - center) / factor))\n\n        kernel_size = up_scale * 2\n        bil_filt = upsample_filt(kernel_size)\n\n        kernel = np.zeros(\n            (dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32\n        )\n        kernel[range(dim_out), range(dim_in), :, :] = bil_filt\n\n        blob = self.ConvTranspose(\n            blob_in,\n            blob_out,\n            dim_in,\n            dim_out,\n            kernel_size,\n            stride=int(up_scale),\n            pad=int(up_scale / 2),\n            weight_init=('GivenTensorFill', {'values': kernel}),\n            bias_init=('ConstantFill', {'value': 0.})\n        )\n        self.do_not_update_params.append(self.weights[-1])\n        self.do_not_update_params.append(self.biases[-1])\n        return blob\n\n    def ConvAffine(  # args in the same order of Conv()\n        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,\n        group=1, dilation=1,\n        weight_init=None,\n        bias_init=None,\n        suffix='_bn',\n        inplace=False\n    ):\n        \"\"\"ConvAffine adds a Conv op followed by a AffineChannel op (which\n        replaces BN during fine tuning).\n        \"\"\"\n        conv_blob = self.Conv(\n            blob_in,\n            prefix,\n            dim_in,\n            dim_out,\n            kernel,\n            stride=stride,\n            pad=pad,\n            group=group,\n            dilation=dilation,\n            weight_init=weight_init,\n            bias_init=bias_init,\n            no_bias=1\n        )\n        blob_out = self.AffineChannel(\n            conv_blob, prefix + suffix, dim=dim_out, inplace=inplace\n        )\n        return blob_out\n\n    def ConvGN(  # args in the same order of Conv()\n        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,\n        group_gn,  # num of groups in gn\n        group=1, dilation=1,\n        weight_init=None,\n        bias_init=None,\n        suffix='_gn',\n        no_conv_bias=1,\n    ):\n        \"\"\"ConvGN adds a Conv op followed by a GroupNorm op,\n        including learnable scale/bias (gamma/beta)\n        \"\"\"\n        conv_blob = self.Conv(\n            blob_in,\n            prefix,\n            dim_in,\n            dim_out,\n            kernel,\n            stride=stride,\n            pad=pad,\n            group=group,\n            dilation=dilation,\n            weight_init=weight_init,\n            bias_init=bias_init,\n            no_bias=no_conv_bias)\n\n        if group_gn < 1:\n            logger.warning(\n                'Layer: {} (dim {}): '\n                'group_gn < 1; reset to 1.'.format(prefix, dim_in)\n            )\n            group_gn = 1\n\n        blob_out = self.SpatialGN(\n            conv_blob, prefix + suffix,\n            dim_out, group=group_gn,  # op's arg name is \"group\"\n            epsilon=cfg.GROUP_NORM.EPSILON,)\n\n        self.gn_params.append(self.params[-1])  # add gn's bias to list\n        self.gn_params.append(self.params[-2])  # add gn's scale to list\n        return blob_out\n\n    def DisableCudnn(self):\n        self.prev_use_cudnn = self.use_cudnn\n        self.use_cudnn = False\n\n    def RestorePreviousUseCudnn(self):\n        prev_use_cudnn = self.use_cudnn\n        self.use_cudnn = self.prev_use_cudnn\n        self.prev_use_cudnn = prev_use_cudnn\n\n    def UpdateWorkspaceLr(self, cur_iter, new_lr):\n        \"\"\"Updates the model's current learning rate and the workspace (learning\n        rate and update history/momentum blobs).\n        \"\"\"\n        # The workspace is the one source of truth for the lr\n        # The lr is always the same on all GPUs\n        cur_lr = workspace.FetchBlob('gpu_0/lr')[0]\n        # There are no type conversions between the lr in Python and the lr in\n        # the GPU (both are float32), so exact comparision is ok\n        if cur_lr != new_lr:\n            ratio = _get_lr_change_ratio(cur_lr, new_lr)\n            if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:\n                logger.info(\n                    'Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.\n                    format(cur_lr, new_lr, cur_iter))\n            self._SetNewLr(cur_lr, new_lr)\n        return new_lr\n\n    def _SetNewLr(self, cur_lr, new_lr):\n        \"\"\"Do the actual work of updating the model and workspace blobs.\n        \"\"\"\n        for i in range(cfg.NUM_GPUS):\n            with c2_utils.CudaScope(i):\n                workspace.FeedBlob(\n                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))\n        ratio = _get_lr_change_ratio(cur_lr, new_lr)\n        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \\\n                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:\n            self._CorrectMomentum(new_lr / cur_lr)\n\n    def _CorrectMomentum(self, correction):\n        \"\"\"The MomentumSGDUpdate op implements the update V as\n\n            V := mu * V + lr * grad,\n\n        where mu is the momentum factor, lr is the learning rate, and grad is\n        the stochastic gradient. Since V is not defined independently of the\n        learning rate (as it should ideally be), when the learning rate is\n        changed we should scale the update history V in order to make it\n        compatible in scale with lr * grad.\n        \"\"\"\n        logger.info(\n            'Scaling update history by {:.6f} (new lr / old lr)'.\n            format(correction))\n        for i in range(cfg.NUM_GPUS):\n            with c2_utils.CudaScope(i):\n                for param in self.TrainableParams(gpu_id=i):\n                    op = core.CreateOperator(\n                        'Scale', [param + '_momentum'], [param + '_momentum'],\n                        scale=correction)\n                    workspace.RunOperatorOnce(op)\n\n    def GetLossScale(self):\n        \"\"\"Allow a way to configure the loss scale dynamically.\n\n        This may be used in a distributed data parallel setting.\n        \"\"\"\n        return 1.0 / cfg.NUM_GPUS\n\n    def AddLosses(self, losses):\n        if not isinstance(losses, list):\n            losses = [losses]\n        # Conversion to str allows losses to include BlobReferences\n        losses = [c2_utils.UnscopeName(str(l)) for l in losses]\n        self.losses = list(set(self.losses + losses))\n\n    def AddMetrics(self, metrics):\n        if not isinstance(metrics, list):\n            metrics = [metrics]\n        self.metrics = list(set(self.metrics + metrics))\n\n\ndef _get_lr_change_ratio(cur_lr, new_lr):\n    eps = 1e-10\n    ratio = np.max(\n        (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))\n    )\n    return ratio\n"
  },
  {
    "path": "detectron/modeling/fast_rcnn_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Various network \"heads\" for classification and bounding box prediction.\n\nThe design is as follows:\n\n... -> RoI ----\\                               /-> box cls output -> cls loss\n                -> RoIFeatureXform -> box head\n... -> Feature /                               \\-> box reg output -> reg loss\n       Map\n\nThe Fast R-CNN head produces a feature representation of the RoI for the purpose\nof bounding box classification and regression. The box output module converts\nthe feature representation into classification and regression predictions.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\nfrom detectron.utils.net import get_group_gn\nimport detectron.utils.blob as blob_utils\n\n\n# ---------------------------------------------------------------------------- #\n# Fast R-CNN outputs and losses\n# ---------------------------------------------------------------------------- #\n\ndef add_fast_rcnn_outputs(model, blob_in, dim):\n    \"\"\"Add RoI classification and bounding box regression output ops.\"\"\"\n    # Box classification layer\n    model.FC(\n        blob_in,\n        'cls_score',\n        dim,\n        model.num_classes,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n    if not model.train:  # == if test\n        # Only add softmax when testing; during training the softmax is combined\n        # with the label cross entropy loss for numerical stability\n        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')\n    # Box regression layer\n    num_bbox_reg_classes = (\n        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes\n    )\n    model.FC(\n        blob_in,\n        'bbox_pred',\n        dim,\n        num_bbox_reg_classes * 4,\n        weight_init=gauss_fill(0.001),\n        bias_init=const_fill(0.0)\n    )\n\n\ndef add_fast_rcnn_losses(model):\n    \"\"\"Add losses for RoI classification and bounding box regression.\"\"\"\n    cls_prob, loss_cls = model.net.SoftmaxWithLoss(\n        ['cls_score', 'labels_int32'], ['cls_prob', 'loss_cls'],\n        scale=model.GetLossScale()\n    )\n    loss_bbox = model.net.SmoothL1Loss(\n        [\n            'bbox_pred', 'bbox_targets', 'bbox_inside_weights',\n            'bbox_outside_weights'\n        ],\n        'loss_bbox',\n        scale=model.GetLossScale()\n    )\n    loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox])\n    model.Accuracy(['cls_prob', 'labels_int32'], 'accuracy_cls')\n    model.AddLosses(['loss_cls', 'loss_bbox'])\n    model.AddMetrics('accuracy_cls')\n    return loss_gradients\n\n\n# ---------------------------------------------------------------------------- #\n# Box heads\n# ---------------------------------------------------------------------------- #\n\ndef add_roi_2mlp_head(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add a ReLU MLP with two hidden layers.\"\"\"\n    hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM\n    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION\n    roi_feat = model.RoIFeatureTransform(\n        blob_in,\n        'roi_feat',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=roi_size,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n    model.FC(roi_feat, 'fc6', dim_in * roi_size * roi_size, hidden_dim)\n    model.Relu('fc6', 'fc6')\n    model.FC('fc6', 'fc7', hidden_dim, hidden_dim)\n    model.Relu('fc7', 'fc7')\n    return 'fc7', hidden_dim\n\n\ndef add_roi_Xconv1fc_head(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add a X conv + 1fc head, as a reference if not using GroupNorm\"\"\"\n    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM\n    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION\n    roi_feat = model.RoIFeatureTransform(\n        blob_in,\n        'roi_feat',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=roi_size,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    current = roi_feat\n    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):\n        current = model.Conv(\n            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,\n            stride=1, pad=1,\n            weight_init=('MSRAFill', {}),\n            bias_init=('ConstantFill', {'value': 0.}),\n            no_bias=0)\n        current = model.Relu(current, current)\n        dim_in = hidden_dim\n\n    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM\n    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)\n    model.Relu('fc6', 'fc6')\n    return 'fc6', fc_dim\n\n\ndef add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add a X conv + 1fc head, with GroupNorm\"\"\"\n    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM\n    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION\n    roi_feat = model.RoIFeatureTransform(\n        blob_in, 'roi_feat',\n        blob_rois='rois',\n        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,\n        resolution=roi_size,\n        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    current = roi_feat\n    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):\n        current = model.ConvGN(\n            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,\n            group_gn=get_group_gn(hidden_dim),\n            stride=1, pad=1,\n            weight_init=('MSRAFill', {}),\n            bias_init=('ConstantFill', {'value': 0.}))\n        current = model.Relu(current, current)\n        dim_in = hidden_dim\n\n    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM\n    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)\n    model.Relu('fc6', 'fc6')\n    return 'fc6', fc_dim\n"
  },
  {
    "path": "detectron/modeling/generate_anchors.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick and Sean Bell\n# --------------------------------------------------------\n\nimport numpy as np\n\n# Verify that we compute the same anchors as Shaoqing's matlab implementation:\n#\n#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat\n#    >> anchors\n#\n#    anchors =\n#\n#       -83   -39   100    56\n#      -175   -87   192   104\n#      -359  -183   376   200\n#       -55   -55    72    72\n#      -119  -119   136   136\n#      -247  -247   264   264\n#       -35   -79    52    96\n#       -79  -167    96   184\n#      -167  -343   184   360\n\n# array([[ -83.,  -39.,  100.,   56.],\n#        [-175.,  -87.,  192.,  104.],\n#        [-359., -183.,  376.,  200.],\n#        [ -55.,  -55.,   72.,   72.],\n#        [-119., -119.,  136.,  136.],\n#        [-247., -247.,  264.,  264.],\n#        [ -35.,  -79.,   52.,   96.],\n#        [ -79., -167.,   96.,  184.],\n#        [-167., -343.,  184.,  360.]])\n\n\ndef generate_anchors(\n    stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)\n):\n    \"\"\"Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors\n    are centered on stride / 2, have (approximate) sqrt areas of the specified\n    sizes, and aspect ratios as given.\n    \"\"\"\n    return _generate_anchors(\n        stride,\n        np.array(sizes, dtype=float) / stride,\n        np.array(aspect_ratios, dtype=float)\n    )\n\n\ndef _generate_anchors(base_size, scales, aspect_ratios):\n    \"\"\"Generate anchor (reference) windows by enumerating aspect ratios X\n    scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.\n    \"\"\"\n    anchor = np.array([1, 1, base_size, base_size], dtype=float) - 1\n    anchors = _ratio_enum(anchor, aspect_ratios)\n    anchors = np.vstack(\n        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]\n    )\n    return anchors\n\n\ndef _whctrs(anchor):\n    \"\"\"Return width, height, x center, and y center for an anchor (window).\"\"\"\n    w = anchor[2] - anchor[0] + 1\n    h = anchor[3] - anchor[1] + 1\n    x_ctr = anchor[0] + 0.5 * (w - 1)\n    y_ctr = anchor[1] + 0.5 * (h - 1)\n    return w, h, x_ctr, y_ctr\n\n\ndef _mkanchors(ws, hs, x_ctr, y_ctr):\n    \"\"\"Given a vector of widths (ws) and heights (hs) around a center\n    (x_ctr, y_ctr), output a set of anchors (windows).\n    \"\"\"\n    ws = ws[:, np.newaxis]\n    hs = hs[:, np.newaxis]\n    anchors = np.hstack(\n        (\n            x_ctr - 0.5 * (ws - 1),\n            y_ctr - 0.5 * (hs - 1),\n            x_ctr + 0.5 * (ws - 1),\n            y_ctr + 0.5 * (hs - 1)\n        )\n    )\n    return anchors\n\n\ndef _ratio_enum(anchor, ratios):\n    \"\"\"Enumerate a set of anchors for each aspect ratio wrt an anchor.\"\"\"\n    w, h, x_ctr, y_ctr = _whctrs(anchor)\n    size = w * h\n    size_ratios = size / ratios\n    ws = np.round(np.sqrt(size_ratios))\n    hs = np.round(ws * ratios)\n    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)\n    return anchors\n\n\ndef _scale_enum(anchor, scales):\n    \"\"\"Enumerate a set of anchors for each scale wrt an anchor.\"\"\"\n    w, h, x_ctr, y_ctr = _whctrs(anchor)\n    ws = w * scales\n    hs = h * scales\n    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)\n    return anchors\n"
  },
  {
    "path": "detectron/modeling/keypoint_rcnn_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Various network \"heads\" for predicting keypoints in Mask R-CNN.\n\nThe design is as follows:\n\n... -> RoI ----\\\n                -> RoIFeatureXform -> keypoint head -> keypoint output -> loss\n... -> Feature /\n       Map\n\nThe keypoint head produces a feature representation of the RoI for the purpose\nof keypoint prediction. The keypoint output module converts the feature\nrepresentation into keypoint heatmaps.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\nimport detectron.modeling.ResNet as ResNet\nimport detectron.utils.blob as blob_utils\n\n\n# ---------------------------------------------------------------------------- #\n# Keypoint R-CNN outputs and losses\n# ---------------------------------------------------------------------------- #\n\ndef add_keypoint_outputs(model, blob_in, dim):\n    \"\"\"Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.\"\"\"\n    # NxKxHxW\n    upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)\n\n    if cfg.KRCNN.USE_DECONV:\n        # Apply ConvTranspose to the feature representation; results in 2x\n        # upsampling\n        blob_in = model.ConvTranspose(\n            blob_in,\n            'kps_deconv',\n            dim,\n            cfg.KRCNN.DECONV_DIM,\n            kernel=cfg.KRCNN.DECONV_KERNEL,\n            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),\n            stride=2,\n            weight_init=gauss_fill(0.01),\n            bias_init=const_fill(0.0)\n        )\n        model.Relu('kps_deconv', 'kps_deconv')\n        dim = cfg.KRCNN.DECONV_DIM\n\n    if upsample_heatmap:\n        blob_name = 'kps_score_lowres'\n    else:\n        blob_name = 'kps_score'\n\n    if cfg.KRCNN.USE_DECONV_OUTPUT:\n        # Use ConvTranspose to predict heatmaps; results in 2x upsampling\n        blob_out = model.ConvTranspose(\n            blob_in,\n            blob_name,\n            dim,\n            cfg.KRCNN.NUM_KEYPOINTS,\n            kernel=cfg.KRCNN.DECONV_KERNEL,\n            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),\n            stride=2,\n            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),\n            bias_init=const_fill(0.0)\n        )\n    else:\n        # Use Conv to predict heatmaps; does no upsampling\n        blob_out = model.Conv(\n            blob_in,\n            blob_name,\n            dim,\n            cfg.KRCNN.NUM_KEYPOINTS,\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),\n            bias_init=const_fill(0.0)\n        )\n\n    if upsample_heatmap:\n        # Increase heatmap output size via bilinear upsampling\n        blob_out = model.BilinearInterpolation(\n            blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS,\n            cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE\n        )\n\n    return blob_out\n\n\ndef add_keypoint_losses(model):\n    \"\"\"Add Mask R-CNN keypoint specific losses.\"\"\"\n    # Reshape input from (N, K, H, W) to (NK, HW)\n    model.net.Reshape(\n        ['kps_score'], ['kps_score_reshaped', '_kps_score_old_shape'],\n        shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE)\n    )\n    # Softmax across **space** (woahh....space!)\n    # Note: this is not what is commonly called \"spatial softmax\"\n    # (i.e., softmax applied along the channel dimension at each spatial\n    # location); This is softmax applied over a set of spatial locations (i.e.,\n    # each spatial location is a \"class\").\n    kps_prob, loss_kps = model.net.SoftmaxWithLoss(\n        ['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'],\n        ['kps_prob', 'loss_kps'],\n        scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS,\n        spatial=0\n    )\n    if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:\n        # Discussion: the softmax loss above will average the loss by the sum of\n        # keypoint_weights, i.e. the total number of visible keypoints. Since\n        # the number of visible keypoints can vary significantly between\n        # minibatches, this has the effect of up-weighting the importance of\n        # minibatches with few visible keypoints. (Imagine the extreme case of\n        # only one visible keypoint versus N: in the case of N, each one\n        # contributes 1/N to the gradient compared to the single keypoint\n        # determining the gradient direction). Instead, we can normalize the\n        # loss by the total number of keypoints, if it were the case that all\n        # keypoints were visible in a full minibatch. (Returning to the example,\n        # this means that the one visible keypoint contributes as much as each\n        # of the N keypoints.)\n        model.StopGradient(\n            'keypoint_loss_normalizer', 'keypoint_loss_normalizer'\n        )\n        loss_kps = model.net.Mul(\n            ['loss_kps', 'keypoint_loss_normalizer'], 'loss_kps_normalized'\n        )\n    loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps])\n    model.AddLosses(loss_kps)\n    return loss_gradients\n\n\n# ---------------------------------------------------------------------------- #\n# Keypoint heads\n# ---------------------------------------------------------------------------- #\n\ndef add_ResNet_roi_conv5_head_for_keypoints(\n    model, blob_in, dim_in, spatial_scale\n):\n    \"\"\"Add a ResNet \"conv5\" / \"stage5\" head for Mask R-CNN keypoint prediction.\n    \"\"\"\n    model.RoIFeatureTransform(\n        blob_in,\n        '_[pose]_pool5',\n        blob_rois='keypoint_rois',\n        method=cfg.KRCNN.ROI_XFORM_METHOD,\n        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n    # Using the prefix '_[pose]_' to 'res5' enables initializing the head's\n    # parameters using pretrained 'res5' parameters if given (see\n    # utils.net.initialize_from_weights_file)\n    s, dim_in = ResNet.add_stage(\n        model,\n        '_[pose]_res5',\n        '_[pose]_pool5',\n        3,\n        dim_in,\n        2048,\n        512,\n        cfg.KRCNN.DILATION,\n        stride_init=int(cfg.KRCNN.ROI_XFORM_RESOLUTION / 7)\n    )\n    return s, 2048\n\n\ndef add_roi_pose_head_v1convX(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add a Mask R-CNN keypoint head. v1convX design: X * (conv).\"\"\"\n    hidden_dim = cfg.KRCNN.CONV_HEAD_DIM\n    kernel_size = cfg.KRCNN.CONV_HEAD_KERNEL\n    pad_size = kernel_size // 2\n    current = model.RoIFeatureTransform(\n        blob_in,\n        '_[pose]_roi_feat',\n        blob_rois='keypoint_rois',\n        method=cfg.KRCNN.ROI_XFORM_METHOD,\n        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    for i in range(cfg.KRCNN.NUM_STACKED_CONVS):\n        current = model.Conv(\n            current,\n            'conv_fcn' + str(i + 1),\n            dim_in,\n            hidden_dim,\n            kernel_size,\n            stride=1,\n            pad=pad_size,\n            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.01}),\n            bias_init=('ConstantFill', {'value': 0.})\n        )\n        current = model.Relu(current, current)\n        dim_in = hidden_dim\n\n    return current, hidden_dim\n"
  },
  {
    "path": "detectron/modeling/mask_rcnn_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Various network \"heads\" for predicting masks in Mask R-CNN.\n\nThe design is as follows:\n\n... -> RoI ----\\\n                -> RoIFeatureXform -> mask head -> mask output -> loss\n... -> Feature /\n       Map\n\nThe mask head produces a feature representation of the RoI for the purpose\nof mask prediction. The mask output module converts the feature representation\ninto real-valued (soft) masks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\nfrom detectron.utils.net import get_group_gn\nimport detectron.modeling.ResNet as ResNet\nimport detectron.utils.blob as blob_utils\n\n\n# ---------------------------------------------------------------------------- #\n# Mask R-CNN outputs and losses\n# ---------------------------------------------------------------------------- #\n\ndef add_mask_rcnn_outputs(model, blob_in, dim):\n    \"\"\"Add Mask R-CNN specific outputs: either mask logits or probs.\"\"\"\n    num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1\n\n    if cfg.MRCNN.USE_FC_OUTPUT:\n        # Predict masks with a fully connected layer (ignore 'fcn' in the blob\n        # name)\n        dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2)\n        blob_out = model.FC(\n            blob_in,\n            'mask_fcn_logits',\n            dim_fc,\n            num_cls * cfg.MRCNN.RESOLUTION**2,\n            weight_init=gauss_fill(0.001),\n            bias_init=const_fill(0.0)\n        )\n    else:\n        # Predict mask using Conv\n\n        # Use GaussianFill for class-agnostic mask prediction; fills based on\n        # fan-in can be too large in this case and cause divergence\n        fill = (\n            cfg.MRCNN.CONV_INIT\n            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill'\n        )\n        blob_out = model.Conv(\n            blob_in,\n            'mask_fcn_logits',\n            dim,\n            num_cls,\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=(fill, {'std': 0.001}),\n            bias_init=const_fill(0.0)\n        )\n\n        if cfg.MRCNN.UPSAMPLE_RATIO > 1:\n            blob_out = model.BilinearInterpolation(\n                'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls,\n                cfg.MRCNN.UPSAMPLE_RATIO\n            )\n\n    if not model.train:  # == if test\n        blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')\n\n    return blob_out\n\n\ndef add_mask_rcnn_losses(model, blob_mask):\n    \"\"\"Add Mask R-CNN specific losses.\"\"\"\n    loss_mask = model.net.SigmoidCrossEntropyLoss(\n        [blob_mask, 'masks_int32'],\n        'loss_mask',\n        scale=model.GetLossScale() * cfg.MRCNN.WEIGHT_LOSS_MASK\n    )\n    loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask])\n    model.AddLosses('loss_mask')\n    return loss_gradients\n\n\n# ---------------------------------------------------------------------------- #\n# Mask heads\n# ---------------------------------------------------------------------------- #\n\ndef mask_rcnn_fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale):\n    \"\"\"v1up design: 4 * (conv 3x3), convT 2x2.\"\"\"\n    return mask_rcnn_fcn_head_v1upXconvs(\n        model, blob_in, dim_in, spatial_scale, 4\n    )\n\n\ndef mask_rcnn_fcn_head_v1up4convs_gn(model, blob_in, dim_in, spatial_scale):\n    \"\"\"v1up design: 4 * (conv 3x3), convT 2x2, with GroupNorm\"\"\"\n    return mask_rcnn_fcn_head_v1upXconvs_gn(\n        model, blob_in, dim_in, spatial_scale, 4\n    )\n\n\ndef mask_rcnn_fcn_head_v1up(model, blob_in, dim_in, spatial_scale):\n    \"\"\"v1up design: 2 * (conv 3x3), convT 2x2.\"\"\"\n    return mask_rcnn_fcn_head_v1upXconvs(\n        model, blob_in, dim_in, spatial_scale, 2\n    )\n\n\ndef mask_rcnn_fcn_head_v1upXconvs(\n    model, blob_in, dim_in, spatial_scale, num_convs\n):\n    \"\"\"v1upXconvs design: X * (conv 3x3), convT 2x2.\"\"\"\n    current = model.RoIFeatureTransform(\n        blob_in,\n        blob_out='_[mask]_roi_feat',\n        blob_rois='mask_rois',\n        method=cfg.MRCNN.ROI_XFORM_METHOD,\n        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    dilation = cfg.MRCNN.DILATION\n    dim_inner = cfg.MRCNN.DIM_REDUCED\n\n    for i in range(num_convs):\n        current = model.Conv(\n            current,\n            '_[mask]_fcn' + str(i + 1),\n            dim_in,\n            dim_inner,\n            kernel=3,\n            dilation=dilation,\n            pad=1 * dilation,\n            stride=1,\n            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),\n            bias_init=('ConstantFill', {'value': 0.})\n        )\n        current = model.Relu(current, current)\n        dim_in = dim_inner\n\n    # upsample layer\n    model.ConvTranspose(\n        current,\n        'conv5_mask',\n        dim_inner,\n        dim_inner,\n        kernel=2,\n        pad=0,\n        stride=2,\n        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),\n        bias_init=const_fill(0.0)\n    )\n    blob_mask = model.Relu('conv5_mask', 'conv5_mask')\n\n    return blob_mask, dim_inner\n\n\ndef mask_rcnn_fcn_head_v1upXconvs_gn(\n    model, blob_in, dim_in, spatial_scale, num_convs\n):\n    \"\"\"v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm\"\"\"\n    current = model.RoIFeatureTransform(\n        blob_in,\n        blob_out='_mask_roi_feat',\n        blob_rois='mask_rois',\n        method=cfg.MRCNN.ROI_XFORM_METHOD,\n        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    dilation = cfg.MRCNN.DILATION\n    dim_inner = cfg.MRCNN.DIM_REDUCED\n\n    for i in range(num_convs):\n        current = model.ConvGN(\n            current,\n            '_mask_fcn' + str(i + 1),\n            dim_in,\n            dim_inner,\n            group_gn=get_group_gn(dim_inner),\n            kernel=3,\n            pad=1 * dilation,\n            stride=1,\n            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),\n            bias_init=('ConstantFill', {'value': 0.})\n        )\n        current = model.Relu(current, current)\n        dim_in = dim_inner\n\n    # upsample layer\n    model.ConvTranspose(\n        current,\n        'conv5_mask',\n        dim_inner,\n        dim_inner,\n        kernel=2,\n        pad=0,\n        stride=2,\n        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),\n        bias_init=const_fill(0.0)\n    )\n    blob_mask = model.Relu('conv5_mask', 'conv5_mask')\n\n    return blob_mask, dim_inner\n\n\ndef mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Use a ResNet \"conv5\" / \"stage5\" head for mask prediction. Weights and\n    computation are shared with the conv5 box head. Computation can only be\n    shared during training, since inference is cascaded.\n\n    v0upshare design: conv5, convT 2x2.\n    \"\"\"\n    # Since box and mask head are shared, these must match\n    assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION\n\n    if model.train:  # share computation with bbox head at training time\n        dim_conv5 = 2048\n        blob_conv5 = model.net.SampleAs(\n            ['res5_2_sum', 'roi_has_mask_int32'],\n            ['_[mask]_res5_2_sum_sliced']\n        )\n    else:  # re-compute at test time\n        blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(\n            model,\n            blob_in,\n            dim_in,\n            spatial_scale\n        )\n\n    dim_reduced = cfg.MRCNN.DIM_REDUCED\n\n    blob_mask = model.ConvTranspose(\n        blob_conv5,\n        'conv5_mask',\n        dim_conv5,\n        dim_reduced,\n        kernel=2,\n        pad=0,\n        stride=2,\n        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),  # std only for gauss\n        bias_init=const_fill(0.0)\n    )\n    model.Relu('conv5_mask', 'conv5_mask')\n\n    return blob_mask, dim_reduced\n\n\ndef mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):\n    \"\"\"v0up design: conv5, deconv 2x2 (no weight sharing with the box head).\"\"\"\n    blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(\n        model,\n        blob_in,\n        dim_in,\n        spatial_scale\n    )\n\n    dim_reduced = cfg.MRCNN.DIM_REDUCED\n\n    model.ConvTranspose(\n        blob_conv5,\n        'conv5_mask',\n        dim_conv5,\n        dim_reduced,\n        kernel=2,\n        pad=0,\n        stride=2,\n        weight_init=('GaussianFill', {'std': 0.001}),\n        bias_init=const_fill(0.0)\n    )\n    blob_mask = model.Relu('conv5_mask', 'conv5_mask')\n\n    return blob_mask, dim_reduced\n\n\ndef add_ResNet_roi_conv5_head_for_masks(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add a ResNet \"conv5\" / \"stage5\" head for predicting masks.\"\"\"\n    model.RoIFeatureTransform(\n        blob_in,\n        blob_out='_[mask]_pool5',\n        blob_rois='mask_rois',\n        method=cfg.MRCNN.ROI_XFORM_METHOD,\n        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,\n        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,\n        spatial_scale=spatial_scale\n    )\n\n    dilation = cfg.MRCNN.DILATION\n    stride_init = int(cfg.MRCNN.ROI_XFORM_RESOLUTION / 7)  # by default: 2\n\n    s, dim_in = ResNet.add_stage(\n        model,\n        '_[mask]_res5',\n        '_[mask]_pool5',\n        3,\n        dim_in,\n        2048,\n        512,\n        dilation,\n        stride_init=stride_init\n    )\n\n    return s, 2048\n"
  },
  {
    "path": "detectron/modeling/model_builder.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Detectron model construction functions.\n\nDetectron supports a large number of model types. The configuration space is\nlarge. To get a sense, a given model is in element in the cartesian product of:\n\n  - backbone (e.g., VGG16, ResNet, ResNeXt)\n  - FPN (on or off)\n  - RPN only (just proposals)\n  - Fixed proposals for Fast R-CNN, RFCN, Mask R-CNN (with or without keypoints)\n  - End-to-end model with RPN + Fast R-CNN (i.e., Faster R-CNN), Mask R-CNN, ...\n  - Different \"head\" choices for the model\n  - ... many configuration options ...\n\nA given model is made by combining many basic components. The result is flexible\nthough somewhat complex to understand at first.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\nimport importlib\nimport logging\n\nfrom caffe2.python import core\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.modeling.detector import DetectionModelHelper\nfrom detectron.roi_data.loader import RoIDataLoader\nimport detectron.modeling.fast_rcnn_heads as fast_rcnn_heads\nimport detectron.modeling.keypoint_rcnn_heads as keypoint_rcnn_heads\nimport detectron.modeling.mask_rcnn_heads as mask_rcnn_heads\nimport detectron.modeling.name_compat as name_compat\nimport detectron.modeling.optimizer as optim\nimport detectron.modeling.retinanet_heads as retinanet_heads\nimport detectron.modeling.rfcn_heads as rfcn_heads\nimport detectron.modeling.rpn_heads as rpn_heads\nimport detectron.roi_data.minibatch as roi_data_minibatch\nimport detectron.utils.c2 as c2_utils\n\nlogger = logging.getLogger(__name__)\n\n\n# ---------------------------------------------------------------------------- #\n# Generic recomposable model builders\n#\n# For example, you can create a Fast R-CNN model with the ResNet-50-C4 backbone\n# with the configuration:\n#\n# MODEL:\n#   TYPE: generalized_rcnn\n#   CONV_BODY: ResNet.add_ResNet50_conv4_body\n#   ROI_HEAD: ResNet.add_ResNet_roi_conv5_head\n# ---------------------------------------------------------------------------- #\n\ndef generalized_rcnn(model):\n    \"\"\"This model type handles:\n      - Fast R-CNN\n      - RPN only (not integrated with Fast R-CNN)\n      - Faster R-CNN (stagewise training from NIPS paper)\n      - Faster R-CNN (end-to-end joint training)\n      - Mask R-CNN (stagewise training from NIPS paper)\n      - Mask R-CNN (end-to-end joint training)\n    \"\"\"\n    return build_generic_detection_model(\n        model,\n        get_func(cfg.MODEL.CONV_BODY),\n        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),\n        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),\n        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),\n        freeze_conv_body=cfg.TRAIN.FREEZE_CONV_BODY\n    )\n\n\ndef rfcn(model):\n    # TODO(rbg): fold into build_generic_detection_model\n    return build_generic_rfcn_model(model, get_func(cfg.MODEL.CONV_BODY))\n\n\ndef retinanet(model):\n    # TODO(rbg): fold into build_generic_detection_model\n    return build_generic_retinanet_model(model, get_func(cfg.MODEL.CONV_BODY))\n\n\n# ---------------------------------------------------------------------------- #\n# Helper functions for building various re-usable network bits\n# ---------------------------------------------------------------------------- #\n\ndef create(model_type_func, train=False, gpu_id=0):\n    \"\"\"Generic model creation function that dispatches to specific model\n    building functions.\n\n    By default, this function will generate a data parallel model configured to\n    run on cfg.NUM_GPUS devices. However, you can restrict it to build a model\n    targeted to a specific GPU by specifying gpu_id. This is used by\n    optimizer.build_data_parallel_model() during test time.\n    \"\"\"\n    model = DetectionModelHelper(\n        name=model_type_func,\n        train=train,\n        num_classes=cfg.MODEL.NUM_CLASSES,\n        init_params=train\n    )\n    model.only_build_forward_pass = False\n    model.target_gpu_id = gpu_id\n    return get_func(model_type_func)(model)\n\n\ndef get_func(func_name):\n    \"\"\"Helper to return a function object by name. func_name must identify a\n    function in this module or the path to a function relative to the base\n    'modeling' module.\n    \"\"\"\n    if func_name == '':\n        return None\n    new_func_name = name_compat.get_new_name(func_name)\n    if new_func_name != func_name:\n        logger.warn(\n            'Remapping old function name: {} -> {}'.\n            format(func_name, new_func_name)\n        )\n        func_name = new_func_name\n    try:\n        parts = func_name.split('.')\n        # Refers to a function in this module\n        if len(parts) == 1:\n            return globals()[parts[0]]\n        # Otherwise, assume we're referencing a module under modeling\n        module_name = 'detectron.modeling.' + '.'.join(parts[:-1])\n        module = importlib.import_module(module_name)\n        return getattr(module, parts[-1])\n    except Exception:\n        logger.error('Failed to find function: {}'.format(func_name))\n        raise\n\n\ndef build_generic_detection_model(\n    model,\n    add_conv_body_func,\n    add_roi_box_head_func=None,\n    add_roi_mask_head_func=None,\n    add_roi_keypoint_head_func=None,\n    freeze_conv_body=False\n):\n    def _single_gpu_build_func(model):\n        \"\"\"Build the model on a single GPU. Can be called in a loop over GPUs\n        with name and device scoping to create a data parallel model.\n        \"\"\"\n        # Add the conv body (called \"backbone architecture\" in papers)\n        # E.g., ResNet-50, ResNet-50-FPN, ResNeXt-101-FPN, etc.\n        blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model)\n        if freeze_conv_body:\n            for b in c2_utils.BlobReferenceList(blob_conv):\n                model.StopGradient(b, b)\n\n        if not model.train:  # == inference\n            # Create a net that can be used to execute the conv body on an image\n            # (without also executing RPN or any other network heads)\n            model.conv_body_net = model.net.Clone('conv_body_net')\n\n        head_loss_gradients = {\n            'rpn': None,\n            'box': None,\n            'mask': None,\n            'keypoints': None,\n        }\n\n        if cfg.RPN.RPN_ON:\n            # Add the RPN head\n            head_loss_gradients['rpn'] = rpn_heads.add_generic_rpn_outputs(\n                model, blob_conv, dim_conv, spatial_scale_conv\n            )\n\n        if cfg.FPN.FPN_ON:\n            # After adding the RPN head, restrict FPN blobs and scales to\n            # those used in the RoI heads\n            blob_conv, spatial_scale_conv = _narrow_to_fpn_roi_levels(\n                blob_conv, spatial_scale_conv\n            )\n\n        if not cfg.MODEL.RPN_ONLY:\n            # Add the Fast R-CNN head\n            head_loss_gradients['box'] = _add_fast_rcnn_head(\n                model, add_roi_box_head_func, blob_conv, dim_conv,\n                spatial_scale_conv\n            )\n\n        if cfg.MODEL.MASK_ON:\n            # Add the mask head\n            head_loss_gradients['mask'] = _add_roi_mask_head(\n                model, add_roi_mask_head_func, blob_conv, dim_conv,\n                spatial_scale_conv\n            )\n\n        if cfg.MODEL.KEYPOINTS_ON:\n            # Add the keypoint head\n            head_loss_gradients['keypoint'] = _add_roi_keypoint_head(\n                model, add_roi_keypoint_head_func, blob_conv, dim_conv,\n                spatial_scale_conv\n            )\n\n        if model.train:\n            loss_gradients = {}\n            for lg in head_loss_gradients.values():\n                if lg is not None:\n                    loss_gradients.update(lg)\n            return loss_gradients\n        else:\n            return None\n\n    optim.build_data_parallel_model(model, _single_gpu_build_func)\n    return model\n\n\ndef _narrow_to_fpn_roi_levels(blobs, spatial_scales):\n    \"\"\"Return only the blobs and spatial scales that will be used for RoI heads.\n    Inputs `blobs` and `spatial_scales` may include extra blobs and scales that\n    are used for RPN proposals, but not for RoI heads.\n    \"\"\"\n    # Code only supports case when RPN and ROI min levels are the same\n    assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL\n    # RPN max level can be >= to ROI max level\n    assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL\n    # FPN RPN max level might be > FPN ROI max level in which case we\n    # need to discard some leading conv blobs (blobs are ordered from\n    # max/coarsest level to min/finest level)\n    num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1\n    return blobs[-num_roi_levels:], spatial_scales[-num_roi_levels:]\n\n\ndef _add_fast_rcnn_head(\n    model, add_roi_box_head_func, blob_in, dim_in, spatial_scale_in\n):\n    \"\"\"Add a Fast R-CNN head to the model.\"\"\"\n    blob_frcn, dim_frcn = add_roi_box_head_func(\n        model, blob_in, dim_in, spatial_scale_in\n    )\n    fast_rcnn_heads.add_fast_rcnn_outputs(model, blob_frcn, dim_frcn)\n    if model.train:\n        loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)\n    else:\n        loss_gradients = None\n    return loss_gradients\n\n\ndef _add_roi_mask_head(\n    model, add_roi_mask_head_func, blob_in, dim_in, spatial_scale_in\n):\n    \"\"\"Add a mask prediction head to the model.\"\"\"\n    # Capture model graph before adding the mask head\n    bbox_net = copy.deepcopy(model.net.Proto())\n    # Add the mask head\n    blob_mask_head, dim_mask_head = add_roi_mask_head_func(\n        model, blob_in, dim_in, spatial_scale_in\n    )\n    # Add the mask output\n    blob_mask = mask_rcnn_heads.add_mask_rcnn_outputs(\n        model, blob_mask_head, dim_mask_head\n    )\n\n    if not model.train:  # == inference\n        # Inference uses a cascade of box predictions, then mask predictions.\n        # This requires separate nets for box and mask prediction.\n        # So we extract the mask prediction net, store it as its own network,\n        # then restore model.net to be the bbox-only network\n        model.mask_net, blob_mask = c2_utils.SuffixNet(\n            'mask_net', model.net, len(bbox_net.op), blob_mask\n        )\n        model.net._net = bbox_net\n        loss_gradients = None\n    else:\n        loss_gradients = mask_rcnn_heads.add_mask_rcnn_losses(model, blob_mask)\n    return loss_gradients\n\n\ndef _add_roi_keypoint_head(\n    model, add_roi_keypoint_head_func, blob_in, dim_in, spatial_scale_in\n):\n    \"\"\"Add a keypoint prediction head to the model.\"\"\"\n    # Capture model graph before adding the mask head\n    bbox_net = copy.deepcopy(model.net.Proto())\n    # Add the keypoint head\n    blob_keypoint_head, dim_keypoint_head = add_roi_keypoint_head_func(\n        model, blob_in, dim_in, spatial_scale_in\n    )\n    # Add the keypoint output\n    blob_keypoint = keypoint_rcnn_heads.add_keypoint_outputs(\n        model, blob_keypoint_head, dim_keypoint_head\n    )\n\n    if not model.train:  # == inference\n        # Inference uses a cascade of box predictions, then keypoint predictions\n        # This requires separate nets for box and keypoint prediction.\n        # So we extract the keypoint prediction net, store it as its own\n        # network, then restore model.net to be the bbox-only network\n        model.keypoint_net, keypoint_blob_out = c2_utils.SuffixNet(\n            'keypoint_net', model.net, len(bbox_net.op), blob_keypoint\n        )\n        model.net._net = bbox_net\n        loss_gradients = None\n    else:\n        loss_gradients = keypoint_rcnn_heads.add_keypoint_losses(model)\n    return loss_gradients\n\n\ndef build_generic_rfcn_model(model, add_conv_body_func, dim_reduce=None):\n    # TODO(rbg): fold this function into build_generic_detection_model\n    def _single_gpu_build_func(model):\n        \"\"\"Builds the model on a single GPU. Can be called in a loop over GPUs\n        with name and device scoping to create a data parallel model.\"\"\"\n        blob, dim, spatial_scale = add_conv_body_func(model)\n        if not model.train:\n            model.conv_body_net = model.net.Clone('conv_body_net')\n        rfcn_heads.add_rfcn_outputs(model, blob, dim, dim_reduce, spatial_scale)\n        if model.train:\n            loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)\n        return loss_gradients if model.train else None\n\n    optim.build_data_parallel_model(model, _single_gpu_build_func)\n    return model\n\n\ndef build_generic_retinanet_model(\n    model, add_conv_body_func, freeze_conv_body=False\n):\n    # TODO(rbg): fold this function into build_generic_detection_model\n    def _single_gpu_build_func(model):\n        \"\"\"Builds the model on a single GPU. Can be called in a loop over GPUs\n        with name and device scoping to create a data parallel model.\"\"\"\n        blobs, dim, spatial_scales = add_conv_body_func(model)\n        if not model.train:\n            model.conv_body_net = model.net.Clone('conv_body_net')\n        retinanet_heads.add_fpn_retinanet_outputs(\n            model, blobs, dim, spatial_scales\n        )\n        if model.train:\n            loss_gradients = retinanet_heads.add_fpn_retinanet_losses(\n                model\n            )\n        return loss_gradients if model.train else None\n\n    optim.build_data_parallel_model(model, _single_gpu_build_func)\n    return model\n\n\n# ---------------------------------------------------------------------------- #\n# Network inputs\n# ---------------------------------------------------------------------------- #\n\ndef add_training_inputs(model, roidb=None):\n    \"\"\"Create network input ops and blobs used for training. To be called\n    *after* model_builder.create().\n    \"\"\"\n    # Implementation notes:\n    #   Typically, one would create the input ops and then the rest of the net.\n    #   However, creating the input ops depends on loading the dataset, which\n    #   can take a few minutes for COCO.\n    #   We prefer to avoid waiting so debugging can fail fast.\n    #   Thus, we create the net *without input ops* prior to loading the\n    #   dataset, and then add the input ops after loading the dataset.\n    #   Since we defer input op creation, we need to do a little bit of surgery\n    #   to place the input ops at the start of the network op list.\n    assert model.train, 'Training inputs can only be added to a trainable model'\n    if roidb is not None:\n        # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1\n        model.roi_data_loader = RoIDataLoader(\n            roidb,\n            num_loaders=cfg.DATA_LOADER.NUM_THREADS,\n            minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,\n            blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY\n        )\n    orig_num_op = len(model.net._net.op)\n    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)\n    for gpu_id in range(cfg.NUM_GPUS):\n        with c2_utils.NamedCudaScope(gpu_id):\n            for blob_name in blob_names:\n                workspace.CreateBlob(core.ScopedName(blob_name))\n            model.net.DequeueBlobs(\n                model.roi_data_loader._blobs_queue_name, blob_names\n            )\n    # A little op surgery to move input ops to the start of the net\n    diff = len(model.net._net.op) - orig_num_op\n    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]\n    del model.net._net.op[:]\n    model.net._net.op.extend(new_op)\n\n\ndef add_inference_inputs(model):\n    \"\"\"Create network input blobs used for inference.\"\"\"\n\n    def create_input_blobs_for_net(net_def):\n        for op in net_def.op:\n            for blob_in in op.input:\n                if not workspace.HasBlob(blob_in):\n                    workspace.CreateBlob(blob_in)\n\n    create_input_blobs_for_net(model.net.Proto())\n    if cfg.MODEL.MASK_ON:\n        create_input_blobs_for_net(model.mask_net.Proto())\n    if cfg.MODEL.KEYPOINTS_ON:\n        create_input_blobs_for_net(model.keypoint_net.Proto())\n\n\n# ---------------------------------------------------------------------------- #\n# ********************** DEPRECATED FUNCTIONALITY BELOW ********************** #\n# ---------------------------------------------------------------------------- #\n\n# ---------------------------------------------------------------------------- #\n# Hardcoded functions to create various types of common models\n#\n#            *** This type of model definition is deprecated ***\n#            *** Use the generic composable versions instead ***\n#\n# ---------------------------------------------------------------------------- #\n\nimport detectron.modeling.ResNet as ResNet\nimport detectron.modeling.VGG16 as VGG16\nimport detectron.modeling.VGG_CNN_M_1024 as VGG_CNN_M_1024\n\n\ndef fast_rcnn(model):\n    logger.warn('Deprecated: use `MODEL.TYPE: generalized_rcnn`.')\n    return generalized_rcnn(model)\n\n\ndef mask_rcnn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.MASK_ON: True`'\n    )\n    return generalized_rcnn(model)\n\n\ndef keypoint_rcnn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.KEYPOINTS_ON: True`'\n    )\n    return generalized_rcnn(model)\n\n\ndef mask_and_keypoint_rcnn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.MASK_ON: True and ``MODEL.KEYPOINTS_ON: True`'\n    )\n    return generalized_rcnn(model)\n\n\ndef rpn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.RPN_ONLY: True`'\n    )\n    return generalized_rcnn(model)\n\n\ndef fpn_rpn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.RPN_ONLY: True` and FPN enabled via configs'\n    )\n    return generalized_rcnn(model)\n\n\ndef faster_rcnn(model):\n    logger.warn(\n        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '\n        '`MODEL.FASTER_RCNN: True`'\n    )\n    return generalized_rcnn(model)\n\n\ndef fast_rcnn_frozen_features(model):\n    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')\n    return build_generic_detection_model(\n        model,\n        get_func(cfg.MODEL.CONV_BODY),\n        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),\n        freeze_conv_body=True\n    )\n\n\ndef rpn_frozen_features(model):\n    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')\n    return build_generic_detection_model(\n        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True\n    )\n\n\ndef fpn_rpn_frozen_features(model):\n    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')\n    return build_generic_detection_model(\n        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True\n    )\n\n\ndef mask_rcnn_frozen_features(model):\n    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')\n    return build_generic_detection_model(\n        model,\n        get_func(cfg.MODEL.CONV_BODY),\n        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),\n        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),\n        freeze_conv_body=True\n    )\n\n\ndef keypoint_rcnn_frozen_features(model):\n    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')\n    return build_generic_detection_model(\n        model,\n        get_func(cfg.MODEL.CONV_BODY),\n        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),\n        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),\n        freeze_conv_body=True\n    )\n\n\n# ---------------------------------------------------------------------------- #\n# Fast R-CNN models\n# ---------------------------------------------------------------------------- #\n\n\ndef VGG_CNN_M_1024_fast_rcnn(model):\n    return build_generic_detection_model(\n        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,\n        VGG_CNN_M_1024.add_VGG_CNN_M_1024_roi_fc_head\n    )\n\n\ndef VGG16_fast_rcnn(model):\n    return build_generic_detection_model(\n        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head\n    )\n\n\ndef ResNet50_fast_rcnn(model):\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head\n    )\n\n\ndef ResNet101_fast_rcnn(model):\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head\n    )\n\n\ndef ResNet50_fast_rcnn_frozen_features(model):\n    return build_generic_detection_model(\n        model,\n        ResNet.add_ResNet50_conv4_body,\n        ResNet.add_ResNet_roi_conv5_head,\n        freeze_conv_body=True\n    )\n\n\ndef ResNet101_fast_rcnn_frozen_features(model):\n    return build_generic_detection_model(\n        model,\n        ResNet.add_ResNet101_conv4_body,\n        ResNet.add_ResNet_roi_conv5_head,\n        freeze_conv_body=True\n    )\n\n\n# ---------------------------------------------------------------------------- #\n# RPN-only models\n# ---------------------------------------------------------------------------- #\n\n\ndef VGG_CNN_M_1024_rpn(model):\n    return build_generic_detection_model(\n        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body\n    )\n\n\ndef VGG16_rpn(model):\n    return build_generic_detection_model(model, VGG16.add_VGG16_conv5_body)\n\n\ndef ResNet50_rpn_conv4(model):\n    return build_generic_detection_model(model, ResNet.add_ResNet50_conv4_body)\n\n\ndef ResNet101_rpn_conv4(model):\n    return build_generic_detection_model(model, ResNet.add_ResNet101_conv4_body)\n\n\ndef VGG_CNN_M_1024_rpn_frozen_features(model):\n    return build_generic_detection_model(\n        model,\n        VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,\n        freeze_conv_body=True\n    )\n\n\ndef VGG16_rpn_frozen_features(model):\n    return build_generic_detection_model(\n        model, VGG16.add_VGG16_conv5_body, freeze_conv_body=True\n    )\n\n\ndef ResNet50_rpn_conv4_frozen_features(model):\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet50_conv4_body, freeze_conv_body=True\n    )\n\n\ndef ResNet101_rpn_conv4_frozen_features(model):\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet101_conv4_body, freeze_conv_body=True\n    )\n\n\n# ---------------------------------------------------------------------------- #\n# Faster R-CNN models\n# ---------------------------------------------------------------------------- #\n\n\ndef VGG16_faster_rcnn(model):\n    assert cfg.MODEL.FASTER_RCNN\n    return build_generic_detection_model(\n        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head\n    )\n\n\ndef ResNet50_faster_rcnn(model):\n    assert cfg.MODEL.FASTER_RCNN\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head\n    )\n\n\ndef ResNet101_faster_rcnn(model):\n    assert cfg.MODEL.FASTER_RCNN\n    return build_generic_detection_model(\n        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head\n    )\n\n\n# ---------------------------------------------------------------------------- #\n# R-FCN models\n# ---------------------------------------------------------------------------- #\n\n\ndef ResNet50_rfcn(model):\n    return build_generic_rfcn_model(\n        model, ResNet.add_ResNet50_conv5_body, dim_reduce=1024\n    )\n\n\ndef ResNet101_rfcn(model):\n    return build_generic_rfcn_model(\n        model, ResNet.add_ResNet101_conv5_body, dim_reduce=1024\n    )\n"
  },
  {
    "path": "detectron/modeling/name_compat.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Handle mapping from old network building function names to new names.\n\nFlexible network configuration is achieved by specifying the function name that\nbuilds a network module (e.g., the name of the conv backbone or the mask roi\nhead). However we may wish to change names over time without breaking previous\nconfig files. This module provides backwards naming compatibility by providing\na mapping from the old name to the new name.\n\nWhen renaming functions, it's generally a good idea to codemod existing yaml\nconfig files. An easy way to batch edit, by example, is a shell command like\n\n$ find . -name \"*.yaml\" -exec sed -i -e \\\n   's/head_builder\\.add_roi_2mlp_head/fast_rcnn_heads.add_roi_2mlp_head/g' {} \\;\n\nto perform the renaming:\n  head_builder.add_roi_2mlp_head => fast_rcnn_heads.add_roi_2mlp_head\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n\n_RENAME = {\n    # Removed \"ResNet_\" from the name because it wasn't relevent\n    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs':\n        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs',\n    # Removed \"ResNet_\" from the name because it wasn't relevent\n    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up':\n        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up',\n    # Removed \"ResNet_\" from the name because it wasn't relevent\n    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0upshare':\n        'mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare',\n    # Removed \"ResNet_\" from the name because it wasn't relevent\n    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0up':\n        'mask_rcnn_heads.mask_rcnn_fcn_head_v0up',\n    # Removed head_builder module in favor of the more specific fast_rcnn name\n    'head_builder.add_roi_2mlp_head':\n        'fast_rcnn_heads.add_roi_2mlp_head',\n}\n\n\ndef get_new_name(func_name):\n    if func_name in _RENAME:\n        func_name = _RENAME[func_name]\n    return func_name\n"
  },
  {
    "path": "detectron/modeling/optimizer.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Optimization operator graph construction.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\n\nfrom caffe2.python import muji\n\nfrom detectron.core.config import cfg\nimport detectron.utils.c2 as c2_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef build_data_parallel_model(model, single_gpu_build_func):\n    \"\"\"Build a data parallel model given a function that builds the model on a\n    single GPU.\n    \"\"\"\n    if model.only_build_forward_pass:\n        single_gpu_build_func(model)\n    elif model.train:\n        all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)\n        # Add backward pass on all GPUs\n        model.AddGradientOperators(all_loss_gradients)\n        if cfg.NUM_GPUS > 1:\n            _add_allreduce_graph(model)\n        for gpu_id in range(cfg.NUM_GPUS):\n            # After allreduce, all GPUs perform SGD updates on their identical\n            # params and gradients in parallel\n            with c2_utils.NamedCudaScope(gpu_id):\n                add_single_gpu_param_update_ops(model, gpu_id)\n    else:\n        # Test-time network operates on single GPU\n        # Test-time parallelism is implemented through multiprocessing\n        with c2_utils.NamedCudaScope(model.target_gpu_id):\n            single_gpu_build_func(model)\n\n\ndef _build_forward_graph(model, single_gpu_build_func):\n    \"\"\"Construct the forward graph on each GPU.\"\"\"\n    all_loss_gradients = {}  # Will include loss gradients from all GPUs\n    # Build the model on each GPU with correct name and device scoping\n    for gpu_id in range(cfg.NUM_GPUS):\n        with c2_utils.NamedCudaScope(gpu_id):\n            all_loss_gradients.update(single_gpu_build_func(model))\n    return all_loss_gradients\n\n\ndef _add_allreduce_graph(model):\n    \"\"\"Construct the graph that performs Allreduce on the gradients.\"\"\"\n    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU\n    all_params = model.TrainableParams()\n    assert len(all_params) % cfg.NUM_GPUS == 0\n    # The model parameters are replicated on each GPU, get the number\n    # distinct parameter blobs (i.e., the number of parameter blobs on\n    # each GPU)\n    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)\n    with c2_utils.CudaScope(0):\n        # Iterate over distinct parameter blobs\n        for i in range(params_per_gpu):\n            # Gradients from all GPUs for this parameter blob\n            gradients = [\n                model.param_to_grad[p] for p in all_params[i::params_per_gpu]\n            ]\n            if len(gradients) > 0:\n                if cfg.USE_NCCL:\n                    model.net.NCCLAllreduce(gradients, gradients)\n                else:\n                    muji.Allreduce(model.net, gradients, reduced_affix='')\n\n\ndef add_single_gpu_param_update_ops(model, gpu_id):\n    # Learning rate of 0 is a dummy value to be set properly at the\n    # start of training\n    lr = model.param_init_net.ConstantFill(\n        [], 'lr', shape=[1], value=0.0\n    )\n    one = model.param_init_net.ConstantFill(\n        [], 'one', shape=[1], value=1.0\n    )\n    wd = model.param_init_net.ConstantFill(\n        [], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY\n    )\n    # weight decay of GroupNorm's parameters\n    wd_gn = model.param_init_net.ConstantFill(\n        [], 'wd_gn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_GN\n    )\n    for param in model.TrainableParams(gpu_id=gpu_id):\n        logger.debug('param ' + str(param) + ' will be updated')\n        param_grad = model.param_to_grad[param]\n        # Initialize momentum vector\n        param_momentum = model.param_init_net.ConstantFill(\n            [param], param + '_momentum', value=0.0\n        )\n        if param in model.biases:\n            # Special treatment for biases (mainly to match historical impl.\n            # details):\n            # (1) Do not apply weight decay\n            # (2) Use a 2x higher learning rate\n            model.Scale(param_grad, param_grad, scale=2.0)\n        elif param in model.gn_params:\n            # Special treatment for GroupNorm's parameters\n            model.WeightedSum([param_grad, one, param, wd_gn], param_grad)\n        elif cfg.SOLVER.WEIGHT_DECAY > 0:\n            # Apply weight decay to non-bias weights\n            model.WeightedSum([param_grad, one, param, wd], param_grad)\n        # Update param_grad and param_momentum in place\n        model.net.MomentumSGDUpdate(\n            [param_grad, param_momentum, lr, param],\n            [param_grad, param_momentum, param],\n            momentum=cfg.SOLVER.MOMENTUM\n        )\n"
  },
  {
    "path": "detectron/modeling/retinanet_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"RetinaNet model heads and losses. See: https://arxiv.org/abs/1708.02002.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.blob as blob_utils\n\n\ndef get_retinanet_bias_init(model):\n    \"\"\"Initialize the biases for the conv ops that predict class probabilities.\n    Initialization is performed such that at the start of training, all\n    locations are predicted to be background with high probability\n    (e.g., ~0.99 = 1 - cfg.RETINANET.PRIOR_PROB). See the Focal Loss paper for\n    details.\n    \"\"\"\n    prior_prob = cfg.RETINANET.PRIOR_PROB\n    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE\n    aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)\n    if cfg.RETINANET.SOFTMAX:\n        # Multiclass softmax case\n        bias = np.zeros((model.num_classes, 1), dtype=np.float32)\n        bias[0] = np.log(\n            (model.num_classes - 1) * (1 - prior_prob) / (prior_prob)\n        )\n        bias = np.vstack(\n            [bias for _ in range(scales_per_octave * aspect_ratios)]\n        )\n        bias_init = (\n            'GivenTensorFill', {\n                'values': bias.astype(dtype=np.float32)\n            }\n        )\n    else:\n        # Per-class sigmoid (binary classification) case\n        bias_init = (\n            'ConstantFill', {\n                'value': -np.log((1 - prior_prob) / prior_prob)\n            }\n        )\n    return bias_init\n\n\ndef add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales):\n    \"\"\"RetinaNet head. For classification and box regression, we can chose to\n    have the same conv tower or a separate tower. \"bl_feat_list\" stores the list\n    of feature blobs for bbox prediction. These blobs can be shared cls feature\n    blobs if we share the tower or else are independent blobs.\n    \"\"\"\n    dim_out = dim_in\n    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid\n    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid\n    A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE\n\n    # compute init for bias\n    bias_init = get_retinanet_bias_init(model)\n\n    assert len(blobs_in) == k_max - k_min + 1\n    bbox_feat_list = []\n    cls_pred_dim = (\n        model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1)\n    )\n    # unpacked bbox feature and add prediction layers\n    bbox_regr_dim = (\n        4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4\n    )\n\n    # ==========================================================================\n    # classification tower with logits and prob prediction\n    # ==========================================================================\n    for lvl in range(k_min, k_max + 1):\n        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order\n        # classification tower stack convolution starts\n        for nconv in range(cfg.RETINANET.NUM_CONVS):\n            suffix = 'n{}_fpn{}'.format(nconv, lvl)\n            dim_in, dim_out = dim_in, dim_in\n            if lvl == k_min:\n                bl_out = model.Conv(\n                    bl_in,\n                    'retnet_cls_conv_' + suffix,\n                    dim_in,\n                    dim_out,\n                    3,\n                    stride=1,\n                    pad=1,\n                    weight_init=('GaussianFill', {\n                        'std': 0.01\n                    }),\n                    bias_init=('ConstantFill', {\n                        'value': 0.\n                    })\n                )\n            else:\n                bl_out = model.ConvShared(\n                    bl_in,\n                    'retnet_cls_conv_' + suffix,\n                    dim_in,\n                    dim_out,\n                    3,\n                    stride=1,\n                    pad=1,\n                    weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),\n                    bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)\n                )\n            bl_in = model.Relu(bl_out, bl_out)\n            bl_feat = bl_in\n        # cls tower stack convolution ends. Add the logits layer now\n        if lvl == k_min:\n            retnet_cls_pred = model.Conv(\n                bl_feat,\n                'retnet_cls_pred_fpn{}'.format(lvl),\n                dim_in,\n                cls_pred_dim * A,\n                3,\n                pad=1,\n                stride=1,\n                weight_init=('GaussianFill', {\n                    'std': 0.01\n                }),\n                bias_init=bias_init\n            )\n        else:\n            retnet_cls_pred = model.ConvShared(\n                bl_feat,\n                'retnet_cls_pred_fpn{}'.format(lvl),\n                dim_in,\n                cls_pred_dim * A,\n                3,\n                pad=1,\n                stride=1,\n                weight='retnet_cls_pred_fpn{}_w'.format(k_min),\n                bias='retnet_cls_pred_fpn{}_b'.format(k_min)\n            )\n        if not model.train:\n            if cfg.RETINANET.SOFTMAX:\n                model.net.GroupSpatialSoftmax(\n                    retnet_cls_pred,\n                    'retnet_cls_prob_fpn{}'.format(lvl),\n                    num_classes=cls_pred_dim\n                )\n            else:\n                model.net.Sigmoid(\n                    retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl)\n                )\n        if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:\n            bbox_feat_list.append(bl_feat)\n\n    # ==========================================================================\n    # bbox tower if not sharing features with the classification tower with\n    # logits and prob prediction\n    # ==========================================================================\n    if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER:\n        for lvl in range(k_min, k_max + 1):\n            bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order\n            for nconv in range(cfg.RETINANET.NUM_CONVS):\n                suffix = 'n{}_fpn{}'.format(nconv, lvl)\n                dim_in, dim_out = dim_in, dim_in\n                if lvl == k_min:\n                    bl_out = model.Conv(\n                        bl_in,\n                        'retnet_bbox_conv_' + suffix,\n                        dim_in,\n                        dim_out,\n                        3,\n                        stride=1,\n                        pad=1,\n                        weight_init=('GaussianFill', {\n                            'std': 0.01\n                        }),\n                        bias_init=('ConstantFill', {\n                            'value': 0.\n                        })\n                    )\n                else:\n                    bl_out = model.ConvShared(\n                        bl_in,\n                        'retnet_bbox_conv_' + suffix,\n                        dim_in,\n                        dim_out,\n                        3,\n                        stride=1,\n                        pad=1,\n                        weight='retnet_bbox_conv_n{}_fpn{}_w'.format(\n                            nconv, k_min\n                        ),\n                        bias='retnet_bbox_conv_n{}_fpn{}_b'.format(\n                            nconv, k_min\n                        )\n                    )\n                bl_in = model.Relu(bl_out, bl_out)\n                # Add octave scales and aspect ratio\n                # At least 1 convolution for dealing different aspect ratios\n                bl_feat = bl_in\n            bbox_feat_list.append(bl_feat)\n    # Depending on the features [shared/separate] for bbox, add prediction layer\n    for i, lvl in enumerate(range(k_min, k_max + 1)):\n        bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)\n        bl_feat = bbox_feat_list[i]\n        if lvl == k_min:\n            model.Conv(\n                bl_feat,\n                bbox_pred,\n                dim_in,\n                bbox_regr_dim * A,\n                3,\n                pad=1,\n                stride=1,\n                weight_init=('GaussianFill', {\n                    'std': 0.01\n                }),\n                bias_init=('ConstantFill', {\n                    'value': 0.\n                })\n            )\n        else:\n            model.ConvShared(\n                bl_feat,\n                bbox_pred,\n                dim_in,\n                bbox_regr_dim * A,\n                3,\n                pad=1,\n                stride=1,\n                weight='retnet_bbox_pred_fpn{}_w'.format(k_min),\n                bias='retnet_bbox_pred_fpn{}_b'.format(k_min)\n            )\n\n\ndef add_fpn_retinanet_losses(model):\n    loss_gradients = {}\n    gradients, losses = [], []\n\n    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid\n    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid\n\n    model.AddMetrics(['retnet_fg_num', 'retnet_bg_num'])\n    # ==========================================================================\n    # bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location\n    # ==========================================================================\n    for lvl in range(k_min, k_max + 1):\n        suffix = 'fpn{}'.format(lvl)\n        bbox_loss = model.net.SelectSmoothL1Loss(\n            [\n                'retnet_bbox_pred_' + suffix,\n                'retnet_roi_bbox_targets_' + suffix,\n                'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num'\n            ],\n            'retnet_loss_bbox_' + suffix,\n            beta=cfg.RETINANET.BBOX_REG_BETA,\n            scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT\n        )\n        gradients.append(bbox_loss)\n        losses.append('retnet_loss_bbox_' + suffix)\n\n    # ==========================================================================\n    # cls loss - depends on softmax/sigmoid outputs\n    # ==========================================================================\n    for lvl in range(k_min, k_max + 1):\n        suffix = 'fpn{}'.format(lvl)\n        cls_lvl_logits = 'retnet_cls_pred_' + suffix\n        if not cfg.RETINANET.SOFTMAX:\n            cls_focal_loss = model.net.SigmoidFocalLoss(\n                [\n                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,\n                    'retnet_fg_num'\n                ],\n                ['fl_{}'.format(suffix)],\n                gamma=cfg.RETINANET.LOSS_GAMMA,\n                alpha=cfg.RETINANET.LOSS_ALPHA,\n                scale=model.GetLossScale(),\n                num_classes=model.num_classes - 1\n            )\n            gradients.append(cls_focal_loss)\n            losses.append('fl_{}'.format(suffix))\n        else:\n            cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss(\n                [\n                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,\n                    'retnet_fg_num'\n                ],\n                ['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)],\n                gamma=cfg.RETINANET.LOSS_GAMMA,\n                alpha=cfg.RETINANET.LOSS_ALPHA,\n                scale=model.GetLossScale(),\n                num_classes=model.num_classes\n            )\n            gradients.append(cls_focal_loss)\n            losses.append('fl_{}'.format(suffix))\n\n    loss_gradients.update(blob_utils.get_loss_gradients(model, gradients))\n    model.AddLosses(losses)\n    return loss_gradients\n"
  },
  {
    "path": "detectron/modeling/rfcn_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\n\n\n# ---------------------------------------------------------------------------- #\n# R-FCN outputs and losses\n# ---------------------------------------------------------------------------- #\n\ndef add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):\n    if dim_reduce is not None:\n        # Optional dim reduction\n        blob_in = model.Conv(\n            blob_in,\n            'conv_dim_reduce',\n            dim_in,\n            dim_reduce,\n            kernel=1,\n            pad=0,\n            stride=1,\n            weight_init=gauss_fill(0.01),\n            bias_init=const_fill(0.0)\n        )\n        blob_in = model.Relu(blob_in, blob_in)\n        dim_in = dim_reduce\n    # Classification conv\n    model.Conv(\n        blob_in,\n        'conv_cls',\n        dim_in,\n        model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,\n        kernel=1,\n        pad=0,\n        stride=1,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n    # Bounding-box regression conv\n    num_bbox_reg_classes = (\n        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes\n    )\n    model.Conv(\n        blob_in,\n        'conv_bbox_pred',\n        dim_in,\n        4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,\n        kernel=1,\n        pad=0,\n        stride=1,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n    # Classification PS RoI pooling\n    model.net.PSRoIPool(\n        ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'],\n        group_size=cfg.RFCN.PS_GRID_SIZE,\n        output_dim=model.num_classes,\n        spatial_scale=spatial_scale\n    )\n    model.AveragePool(\n        'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE\n    )\n    model.net.Reshape(\n        'cls_score_4d', ['cls_score', '_cls_scores_shape'],\n        shape=(-1, cfg.MODEL.NUM_CLASSES)\n    )\n    if not model.train:\n        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')\n    # Bbox regression PS RoI pooling\n    model.net.PSRoIPool(\n        ['conv_bbox_pred', 'rois'],\n        ['psroipooled_bbox', '_mapping_channel_bbox'],\n        group_size=cfg.RFCN.PS_GRID_SIZE,\n        output_dim=4 * num_bbox_reg_classes,\n        spatial_scale=spatial_scale\n    )\n    model.AveragePool(\n        'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE\n    )\n"
  },
  {
    "path": "detectron/modeling/rpn_heads.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom detectron.core.config import cfg\nfrom detectron.modeling.generate_anchors import generate_anchors\nfrom detectron.utils.c2 import const_fill\nfrom detectron.utils.c2 import gauss_fill\nimport detectron.modeling.FPN as FPN\nimport detectron.utils.blob as blob_utils\n\n\n# ---------------------------------------------------------------------------- #\n# RPN and Faster R-CNN outputs and losses\n# ---------------------------------------------------------------------------- #\n\ndef add_generic_rpn_outputs(model, blob_in, dim_in, spatial_scale_in):\n    \"\"\"Add RPN outputs (objectness classification and bounding box regression)\n    to an RPN model. Abstracts away the use of FPN.\n    \"\"\"\n    loss_gradients = None\n    if cfg.FPN.FPN_ON:\n        # Delegate to the FPN module\n        FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)\n        if cfg.MODEL.FASTER_RCNN:\n            # CollectAndDistributeFpnRpnProposals also labels proposals when in\n            # training mode\n            model.CollectAndDistributeFpnRpnProposals()\n        if model.train:\n            loss_gradients = FPN.add_fpn_rpn_losses(model)\n    else:\n        # Not using FPN, add RPN to a single scale\n        add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)\n        if model.train:\n            loss_gradients = add_single_scale_rpn_losses(model)\n    return loss_gradients\n\n\ndef add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):\n    \"\"\"Add RPN outputs to a single scale model (i.e., no FPN).\"\"\"\n    anchors = generate_anchors(\n        stride=1. / spatial_scale,\n        sizes=cfg.RPN.SIZES,\n        aspect_ratios=cfg.RPN.ASPECT_RATIOS\n    )\n    num_anchors = anchors.shape[0]\n    dim_out = dim_in\n    # RPN hidden representation\n    model.Conv(\n        blob_in,\n        'conv_rpn',\n        dim_in,\n        dim_out,\n        kernel=3,\n        pad=1,\n        stride=1,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n    model.Relu('conv_rpn', 'conv_rpn')\n    # Proposal classification scores\n    model.Conv(\n        'conv_rpn',\n        'rpn_cls_logits',\n        dim_in,\n        num_anchors,\n        kernel=1,\n        pad=0,\n        stride=1,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n    # Proposal bbox regression deltas\n    model.Conv(\n        'conv_rpn',\n        'rpn_bbox_pred',\n        dim_in,\n        4 * num_anchors,\n        kernel=1,\n        pad=0,\n        stride=1,\n        weight_init=gauss_fill(0.01),\n        bias_init=const_fill(0.0)\n    )\n\n    if not model.train or cfg.MODEL.FASTER_RCNN:\n        # Proposals are needed during:\n        #  1) inference (== not model.train) for RPN only and Faster R-CNN\n        #  OR\n        #  2) training for Faster R-CNN\n        # Otherwise (== training for RPN only), proposals are not needed\n        model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')\n        model.GenerateProposals(\n            ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],\n            ['rpn_rois', 'rpn_roi_probs'],\n            anchors=anchors,\n            spatial_scale=spatial_scale\n        )\n\n    if cfg.MODEL.FASTER_RCNN:\n        if model.train:\n            # Add op that generates training labels for in-network RPN proposals\n            model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])\n        else:\n            # Alias rois to rpn_rois for inference\n            model.net.Alias('rpn_rois', 'rois')\n\n\ndef add_single_scale_rpn_losses(model):\n    \"\"\"Add losses for a single scale RPN model (i.e., no FPN).\"\"\"\n    # Spatially narrow the full-sized RPN label arrays to match the feature map\n    # shape\n    model.net.SpatialNarrowAs(\n        ['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32'\n    )\n    for key in ('targets', 'inside_weights', 'outside_weights'):\n        model.net.SpatialNarrowAs(\n            ['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key\n        )\n    loss_rpn_cls = model.net.SigmoidCrossEntropyLoss(\n        ['rpn_cls_logits', 'rpn_labels_int32'],\n        'loss_rpn_cls',\n        scale=model.GetLossScale()\n    )\n    loss_rpn_bbox = model.net.SmoothL1Loss(\n        [\n            'rpn_bbox_pred', 'rpn_bbox_targets', 'rpn_bbox_inside_weights',\n            'rpn_bbox_outside_weights'\n        ],\n        'loss_rpn_bbox',\n        beta=1. / 9.,\n        scale=model.GetLossScale()\n    )\n    loss_gradients = blob_utils.get_loss_gradients(\n        model, [loss_rpn_cls, loss_rpn_bbox]\n    )\n    model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox'])\n    return loss_gradients\n"
  },
  {
    "path": "detectron/ops/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/ops/collect_and_distribute_fpn_rpn_proposals.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets import json_dataset\nfrom detectron.datasets import roidb as roidb_utils\nimport detectron.modeling.FPN as fpn\nimport detectron.roi_data.fast_rcnn as fast_rcnn_roi_data\nimport detectron.utils.blob as blob_utils\n\n\nclass CollectAndDistributeFpnRpnProposalsOp:\n    def __init__(self, train):\n        self._train = train\n\n    def forward(self, inputs, outputs):\n        \"\"\"See modeling.detector.CollectAndDistributeFpnRpnProposals for\n        inputs/outputs documentation.\n        \"\"\"\n        # inputs is\n        # [rpn_rois_fpn2, ..., rpn_rois_fpn6,\n        #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]\n        # If training with Faster R-CNN, then inputs will additionally include\n        #  + [roidb, im_info]\n        rois = collect(inputs, self._train)\n        if self._train:\n            # During training we reuse the data loader code. We populate roidb\n            # entries on the fly using the rois generated by RPN.\n            # im_info: [[im_height, im_width, im_scale], ...]\n            im_info = inputs[-1].data\n            im_scales = im_info[:, 2]\n            roidb = blob_utils.deserialize(inputs[-2].data)\n            # For historical consistency with the original Faster R-CNN\n            # implementation we are *not* filtering crowd proposals.\n            # This choice should be investigated in the future (it likely does\n            # not matter).\n            json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)\n            roidb_utils.add_bbox_regression_targets(roidb)\n            # Compute training labels for the RPN proposals; also handles\n            # distributing the proposals over FPN levels\n            output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()\n            blobs = {k: [] for k in output_blob_names}\n            fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)\n            for i, k in enumerate(output_blob_names):\n                blob_utils.py_op_copy_blob(blobs[k], outputs[i])\n        else:\n            # For inference we have a special code path that avoids some data\n            # loader overhead\n            distribute(rois, None, outputs, self._train)\n\n\ndef collect(inputs, is_training):\n    cfg_key = 'TRAIN' if is_training else 'TEST'\n    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N\n    k_max = cfg.FPN.RPN_MAX_LEVEL\n    k_min = cfg.FPN.RPN_MIN_LEVEL\n    num_lvls = k_max - k_min + 1\n    roi_inputs = inputs[:num_lvls]\n    score_inputs = inputs[num_lvls:]\n    if is_training:\n        score_inputs = score_inputs[:-2]\n\n    # rois are in [[batch_idx, x0, y0, x1, y2], ...] format\n    # Combine predictions across all levels and retain the top scoring\n    rois = np.concatenate([blob.data for blob in roi_inputs])\n    scores = np.concatenate([blob.data for blob in score_inputs]).squeeze()\n    inds = np.argsort(-scores)[:post_nms_topN]\n    rois = rois[inds, :]\n    return rois\n\n\ndef distribute(rois, label_blobs, outputs, train):\n    \"\"\"To understand the output blob order see return value of\n    detectron.roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)\n    \"\"\"\n    lvl_min = cfg.FPN.ROI_MIN_LEVEL\n    lvl_max = cfg.FPN.ROI_MAX_LEVEL\n    lvls = fpn.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)\n\n    outputs[0].reshape(rois.shape)\n    outputs[0].data[...] = rois\n\n    # Create new roi blobs for each FPN level\n    # (See: modeling.FPN.add_multilevel_roi_blobs which is similar but annoying\n    # to generalize to support this particular case.)\n    rois_idx_order = np.empty((0, ))\n    for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):\n        idx_lvl = np.where(lvls == lvl)[0]\n        blob_roi_level = rois[idx_lvl, :]\n        outputs[output_idx + 1].reshape(blob_roi_level.shape)\n        outputs[output_idx + 1].data[...] = blob_roi_level\n        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))\n    rois_idx_restore = np.argsort(rois_idx_order)\n    blob_utils.py_op_copy_blob(rois_idx_restore.astype(np.int32), outputs[-1])\n"
  },
  {
    "path": "detectron/ops/generate_proposal_labels.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\n\nfrom detectron.datasets import json_dataset\nfrom detectron.datasets import roidb as roidb_utils\nfrom detectron.utils import blob as blob_utils\nimport detectron.roi_data.fast_rcnn as fast_rcnn_roi_data\n\nlogger = logging.getLogger(__name__)\n\n\nclass GenerateProposalLabelsOp:\n\n    def forward(self, inputs, outputs):\n        \"\"\"See modeling.detector.GenerateProposalLabels for inputs/outputs\n        documentation.\n        \"\"\"\n        # During training we reuse the data loader code. We populate roidb\n        # entries on the fly using the rois generated by RPN.\n        # im_info: [[im_height, im_width, im_scale], ...]\n        rois = inputs[0].data\n        roidb = blob_utils.deserialize(inputs[1].data)\n        im_info = inputs[2].data\n        im_scales = im_info[:, 2]\n        output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()\n        # For historical consistency with the original Faster R-CNN\n        # implementation we are *not* filtering crowd proposals.\n        # This choice should be investigated in the future (it likely does\n        # not matter).\n        json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)\n        roidb_utils.add_bbox_regression_targets(roidb)\n        blobs = {k: [] for k in output_blob_names}\n        fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)\n        for i, k in enumerate(output_blob_names):\n            blob_utils.py_op_copy_blob(blobs[k], outputs[i])\n"
  },
  {
    "path": "detectron/ops/generate_proposals.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick and Sean Bell\n# --------------------------------------------------------\n\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.boxes as box_utils\n\n\nclass GenerateProposalsOp:\n    \"\"\"Output object detection proposals by applying estimated bounding-box\n    transformations to a set of regular boxes (called \"anchors\").\n\n    See comment in utils/boxes:bbox_transform_inv for details abouts the\n    optional `reg_weights` parameter.\n    \"\"\"\n\n    def __init__(self, anchors, spatial_scale, train, reg_weights=(1.0, 1.0, 1.0, 1.0)):\n        self._anchors = anchors\n        self._num_anchors = self._anchors.shape[0]\n        self._feat_stride = 1. / spatial_scale\n        self._train = train\n        self._reg_weights = reg_weights\n\n    def forward(self, inputs, outputs):\n        \"\"\"See modeling.detector.GenerateProposals for inputs/outputs\n        documentation.\n        \"\"\"\n        # 1. for each location i in a (H, W) grid:\n        #      generate A anchor boxes centered on cell i\n        #      apply predicted bbox deltas to each of the A anchors at cell i\n        # 2. clip predicted boxes to image\n        # 3. remove predicted boxes with either height or width < threshold\n        # 4. sort all (proposal, score) pairs by score from highest to lowest\n        # 5. take the top pre_nms_topN proposals before NMS\n        # 6. apply NMS with a loose threshold (0.7) to the remaining proposals\n        # 7. take after_nms_topN proposals after NMS\n        # 8. return the top proposals\n\n        # predicted probability of fg object for each RPN anchor\n        scores = inputs[0].data\n        # predicted achors transformations\n        bbox_deltas = inputs[1].data\n        # input image (height, width, scale), in which scale is the scale factor\n        # applied to the original dataset image to get the network input image\n        im_info = inputs[2].data\n        # 1. Generate proposals from bbox deltas and shifted anchors\n        height, width = scores.shape[-2:]\n        # Enumerate all shifted positions on the (H, W) grid\n        shift_x = np.arange(0, width) * self._feat_stride\n        shift_y = np.arange(0, height) * self._feat_stride\n        shift_x, shift_y = np.meshgrid(shift_x, shift_y, copy=False)\n        # Convert to (K, 4), K=H*W, where the columns are (dx, dy, dx, dy)\n        # shift pointing to each grid location\n        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),\n                            shift_x.ravel(), shift_y.ravel())).transpose()\n\n        # Broacast anchors over shifts to enumerate all anchors at all positions\n        # in the (H, W) grid:\n        #   - add A anchors of shape (1, A, 4) to\n        #   - K shifts of shape (K, 1, 4) to get\n        #   - all shifted anchors of shape (K, A, 4)\n        #   - reshape to (K*A, 4) shifted anchors\n        num_images = inputs[0].shape[0]\n        A = self._num_anchors\n        K = shifts.shape[0]\n        all_anchors = self._anchors[np.newaxis, :, :] + shifts[:, np.newaxis, :]\n        all_anchors = all_anchors.reshape((K * A, 4))\n\n        rois = np.empty((0, 5), dtype=np.float32)\n        roi_probs = np.empty((0, 1), dtype=np.float32)\n        for im_i in range(num_images):\n            im_i_boxes, im_i_probs = self.proposals_for_one_image(\n                im_info[im_i, :], all_anchors, bbox_deltas[im_i, :, :, :],\n                scores[im_i, :, :, :]\n            )\n            batch_inds = im_i * np.ones(\n                (im_i_boxes.shape[0], 1), dtype=np.float32\n            )\n            im_i_rois = np.hstack((batch_inds, im_i_boxes))\n            rois = np.append(rois, im_i_rois, axis=0)\n            roi_probs = np.append(roi_probs, im_i_probs, axis=0)\n\n        outputs[0].reshape(rois.shape)\n        outputs[0].data[...] = rois\n        if len(outputs) > 1:\n            outputs[1].reshape(roi_probs.shape)\n            outputs[1].data[...] = roi_probs\n\n    def proposals_for_one_image(\n        self, im_info, all_anchors, bbox_deltas, scores\n    ):\n        # Get mode-dependent configuration\n        cfg_key = 'TRAIN' if self._train else 'TEST'\n        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N\n        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N\n        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH\n        min_size = cfg[cfg_key].RPN_MIN_SIZE\n        # Transpose and reshape predicted bbox transformations to get them\n        # into the same order as the anchors:\n        #   - bbox deltas will be (4 * A, H, W) format from conv output\n        #   - transpose to (H, W, 4 * A)\n        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)\n        #     in slowest to fastest order to match the enumerated anchors\n        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))\n\n        # Same story for the scores:\n        #   - scores are (A, H, W) format from conv output\n        #   - transpose to (H, W, A)\n        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)\n        #     to match the order of anchors and bbox_deltas\n        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))\n\n        # 4. sort all (proposal, score) pairs by score from highest to lowest\n        # 5. take top pre_nms_topN (e.g. 6000)\n        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):\n            order = np.argsort(-scores.squeeze())\n        else:\n            # Avoid sorting possibly large arrays; First partition to get top K\n            # unsorted and then sort just those (~20x faster for 200k scores)\n            inds = np.argpartition(\n                -scores.squeeze(), pre_nms_topN\n            )[:pre_nms_topN]\n            order = np.argsort(-scores[inds].squeeze())\n            order = inds[order]\n        bbox_deltas = bbox_deltas[order, :]\n        all_anchors = all_anchors[order, :]\n        scores = scores[order]\n\n        # Transform anchors into proposals via bbox transformations\n        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, self._reg_weights)\n\n        # 2. clip proposals to image (may result in proposals with zero area\n        # that will be removed in the next step)\n        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])\n\n        # 3. remove predicted boxes with either height or width < min_size\n        keep = _filter_boxes(proposals, min_size, im_info)\n        proposals = proposals[keep, :]\n        scores = scores[keep]\n\n        # 6. apply loose nms (e.g. threshold = 0.7)\n        # 7. take after_nms_topN (e.g. 300)\n        # 8. return the top proposals (-> RoIs top)\n        if nms_thresh > 0:\n            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)\n            if post_nms_topN > 0:\n                keep = keep[:post_nms_topN]\n            proposals = proposals[keep, :]\n            scores = scores[keep]\n        return proposals, scores\n\n\ndef _filter_boxes(boxes, min_size, im_info):\n    \"\"\"Only keep boxes with both sides >= min_size and center within the image.\n    \"\"\"\n    # Compute the width and height of the proposal boxes as measured in the original\n    # image coordinate system (this is required to avoid \"Negative Areas Found\"\n    # assertions in other parts of the code that measure).\n    im_scale = im_info[2]\n    ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1\n    hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1\n    # To avoid numerical issues we require the min_size to be at least 1 pixel in the\n    # original image\n    min_size = np.maximum(min_size, 1)\n    # Proposal center is computed relative to the scaled input image\n    ws = boxes[:, 2] - boxes[:, 0] + 1\n    hs = boxes[:, 3] - boxes[:, 1] + 1\n    x_ctr = boxes[:, 0] + ws / 2.\n    y_ctr = boxes[:, 1] + hs / 2.\n    keep = np.where(\n        (ws_orig_scale >= min_size)\n        & (hs_orig_scale >= min_size)\n        & (x_ctr < im_info[1])\n        & (y_ctr < im_info[0])\n    )[0]\n    return keep\n"
  },
  {
    "path": "detectron/ops/zero_even_op.cc",
    "content": "/**\n * Copyright (c) 2016-present, Facebook, Inc.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"zero_even_op.h\"\n\nnamespace caffe2 {\n\ntemplate <>\nbool ZeroEvenOp<float, CPUContext>::RunOnDevice() {\n  // Retrieve the input tensor.\n  const auto& X = Input(0);\n  CAFFE_ENFORCE(X.dim() == 1);\n\n  // Initialize the output tensor to a copy of the input tensor.\n  auto* Y = Output(0);\n  Y->CopyFrom(X);\n\n  // Set output elements at even indices to zero.\n  auto* Y_data = Y->mutable_data<float>();\n  for (auto i = 0; i < Y->numel(); i += 2) {\n    Y_data[i] = 0.0f;\n  }\n\n  return true;\n}\n\nREGISTER_CPU_OPERATOR(ZeroEven, ZeroEvenOp<float, CPUContext>);\n\nOPERATOR_SCHEMA(ZeroEven)\n    .NumInputs(1)\n    .NumOutputs(1)\n    .Input(\n        0,\n        \"X\",\n        \"1D input tensor\")\n    .Output(\n        0,\n        \"Y\",\n        \"1D output tensor\");\n\n} // namespace caffe2\n"
  },
  {
    "path": "detectron/ops/zero_even_op.cu",
    "content": "/**\n * Copyright (c) 2016-present, Facebook, Inc.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"caffe2/core/context_gpu.h\"\n\n#include \"zero_even_op.h\"\n\nnamespace caffe2 {\n\nnamespace {\n\ntemplate <typename T>\n__global__ void SetEvenIndsToVal(size_t num_even_inds, T val, T* data) {\n  CUDA_1D_KERNEL_LOOP(i, num_even_inds) {\n    data[i << 1] = val;\n  }\n}\n\n} // namespace\n\ntemplate <>\nbool ZeroEvenOp<float, CUDAContext>::RunOnDevice() {\n  // Retrieve the input tensor.\n  const auto& X = Input(0);\n  CAFFE_ENFORCE(X.ndim() == 1);\n\n  // Initialize the output tensor to a copy of the input tensor.\n  auto* Y = Output(0);\n  Y->CopyFrom(X);\n\n  // Set output elements at even indices to zero.\n  auto output_size = Y->size();\n\n  if (output_size > 0) {\n    size_t num_even_inds = output_size / 2 + output_size % 2;\n    SetEvenIndsToVal<float>\n        <<<CAFFE_GET_BLOCKS(num_even_inds),\n           CAFFE_CUDA_NUM_THREADS,\n           0,\n           context_.cuda_stream()>>>(\n            num_even_inds,\n            0.0f,\n            Y->mutable_data<float>());\n  }\n\n  return true;\n}\n\nREGISTER_CUDA_OPERATOR(ZeroEven, ZeroEvenOp<float, CUDAContext>);\n\n} // namespace caffe2\n"
  },
  {
    "path": "detectron/ops/zero_even_op.h",
    "content": "/**\n * Copyright (c) 2016-present, Facebook, Inc.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef ZERO_EVEN_OP_H_\n#define ZERO_EVEN_OP_H_\n\n#include \"caffe2/core/context.h\"\n#include \"caffe2/core/operator.h\"\n\nnamespace caffe2 {\n\n/**\n * ZeroEven operator. Zeros elements at even indices of an 1D array.\n * Elements at odd indices are preserved.\n *\n * This toy operator is an example of a custom operator and may be a useful\n * reference for adding new custom operators to the Detectron codebase.\n */\ntemplate <typename T, class Context>\nclass ZeroEvenOp final : public Operator<Context> {\n public:\n  // Introduce Operator<Context> helper members.\n  USE_OPERATOR_CONTEXT_FUNCTIONS;\n\n  ZeroEvenOp(const OperatorDef& operator_def, Workspace* ws)\n      : Operator<Context>(operator_def, ws) {}\n\n  bool RunOnDevice() override;\n};\n\n} // namespace caffe2\n\n#endif // ZERO_EVEN_OP_H_\n"
  },
  {
    "path": "detectron/roi_data/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/roi_data/data_utils.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Common utility functions for RPN and RetinaNet minibtach blobs preparation.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import namedtuple\nimport logging\nimport numpy as np\nimport threading\n\nfrom detectron.core.config import cfg\nfrom detectron.modeling.generate_anchors import generate_anchors\nimport detectron.utils.boxes as box_utils\n\nlogger = logging.getLogger(__name__)\n\n\n# octave and aspect fields are only used on RetinaNet. Octave corresponds to the\n# scale of the anchor and aspect denotes which aspect ratio is used in the range\n# of aspect ratios\nFieldOfAnchors = namedtuple(\n    'FieldOfAnchors', [\n        'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',\n        'octave', 'aspect'\n    ]\n)\n\n# Cache for memoizing _get_field_of_anchors\n_threadlocal_foa = threading.local()\n\n\ndef get_field_of_anchors(\n    stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None\n):\n    global _threadlocal_foa\n    if not hasattr(_threadlocal_foa, 'cache'):\n        _threadlocal_foa.cache = {}\n\n    cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)\n    if cache_key in _threadlocal_foa.cache:\n        return _threadlocal_foa.cache[cache_key]\n\n    # Anchors at a single feature cell\n    cell_anchors = generate_anchors(\n        stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios\n    )\n    num_cell_anchors = cell_anchors.shape[0]\n\n    # Generate canonical proposals from shifted anchors\n    # Enumerate all shifted positions on the (H, W) grid\n    fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(\n        cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)\n    )\n    field_size = int(np.ceil(fpn_max_size / float(stride)))\n    shifts = np.arange(0, field_size) * stride\n    shift_x, shift_y = np.meshgrid(shifts, shifts)\n    shift_x = shift_x.ravel()\n    shift_y = shift_y.ravel()\n    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()\n\n    # Broacast anchors over shifts to enumerate all anchors at all positions\n    # in the (H, W) grid:\n    #   - add A cell anchors of shape (1, A, 4) to\n    #   - K shifts of shape (K, 1, 4) to get\n    #   - all shifted anchors of shape (K, A, 4)\n    #   - reshape to (K*A, 4) shifted anchors\n    A = num_cell_anchors\n    K = shifts.shape[0]\n    field_of_anchors = (\n        cell_anchors.reshape((1, A, 4)) +\n        shifts.reshape((1, K, 4)).transpose((1, 0, 2))\n    )\n    field_of_anchors = field_of_anchors.reshape((K * A, 4))\n    foa = FieldOfAnchors(\n        field_of_anchors=field_of_anchors.astype(np.float32),\n        num_cell_anchors=num_cell_anchors,\n        stride=stride,\n        field_size=field_size,\n        octave=octave,\n        aspect=aspect\n    )\n    _threadlocal_foa.cache[cache_key] = foa\n    return foa\n\n\ndef unmap(data, count, inds, fill=0):\n    \"\"\"Unmap a subset of item (data) back to the original set of items (of\n    size count)\"\"\"\n    if count == len(inds):\n        return data\n\n    if len(data.shape) == 1:\n        ret = np.empty((count, ), dtype=data.dtype)\n        ret.fill(fill)\n        ret[inds] = data\n    else:\n        ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)\n        ret.fill(fill)\n        ret[inds, :] = data\n    return ret\n\n\ndef compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):\n    \"\"\"Compute bounding-box regression targets for an image.\"\"\"\n    return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(\n        np.float32, copy=False\n    )\n"
  },
  {
    "path": "detectron/roi_data/fast_rcnn.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Construct minibatches for Fast R-CNN training. Handles the minibatch blobs\nthat are specific to Fast R-CNN. Other blobs that are generic to RPN, etc.\nare handled by their respecitive roi_data modules.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\nimport numpy.random as npr\n\nfrom detectron.core.config import cfg\nimport detectron.modeling.FPN as fpn\nimport detectron.roi_data.keypoint_rcnn as keypoint_rcnn_roi_data\nimport detectron.roi_data.mask_rcnn as mask_rcnn_roi_data\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_fast_rcnn_blob_names(is_training=True):\n    \"\"\"Fast R-CNN blob names.\"\"\"\n    # rois blob: holds R regions of interest, each is a 5-tuple\n    # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a\n    # rectangle (x1, y1, x2, y2)\n    blob_names = ['rois']\n    if is_training:\n        # labels_int32 blob: R categorical labels in [0, ..., K] for K\n        # foreground classes plus background\n        blob_names += ['labels_int32']\n    if is_training:\n        # bbox_targets blob: R bounding-box regression targets with 4\n        # targets per class\n        blob_names += ['bbox_targets']\n        # bbox_inside_weights blob: At most 4 targets per roi are active\n        # this binary vector sepcifies the subset of active targets\n        blob_names += ['bbox_inside_weights']\n        blob_names += ['bbox_outside_weights']\n    if is_training and cfg.MODEL.MASK_ON:\n        # 'mask_rois': RoIs sampled for training the mask prediction branch.\n        # Shape is (#masks, 5) in format (batch_idx, x1, y1, x2, y2).\n        blob_names += ['mask_rois']\n        # 'roi_has_mask': binary labels for the RoIs specified in 'rois'\n        # indicating if each RoI has a mask or not. Note that in some cases\n        # a *bg* RoI will have an all -1 (ignore) mask associated with it in\n        # the case that no fg RoIs can be sampled. Shape is (batchsize).\n        blob_names += ['roi_has_mask_int32']\n        # 'masks_int32' holds binary masks for the RoIs specified in\n        # 'mask_rois'. Shape is (#fg, M * M) where M is the ground truth\n        # mask size.\n        blob_names += ['masks_int32']\n    if is_training and cfg.MODEL.KEYPOINTS_ON:\n        # 'keypoint_rois': RoIs sampled for training the keypoint prediction\n        # branch. Shape is (#instances, 5) in format (batch_idx, x1, y1, x2,\n        # y2).\n        blob_names += ['keypoint_rois']\n        # 'keypoint_locations_int32': index of keypoint in\n        # KRCNN.HEATMAP_SIZE**2 sized array. Shape is (#instances). Used in\n        # SoftmaxWithLoss.\n        blob_names += ['keypoint_locations_int32']\n        # 'keypoint_weights': weight assigned to each target in\n        # 'keypoint_locations_int32'. Shape is (#instances). Used in\n        # SoftmaxWithLoss.\n        blob_names += ['keypoint_weights']\n        # 'keypoint_loss_normalizer': optional normalization factor to use if\n        # cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.\n        blob_names += ['keypoint_loss_normalizer']\n    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:\n        # Support for FPN multi-level rois without bbox reg isn't\n        # implemented (... and may never be implemented)\n        k_max = cfg.FPN.ROI_MAX_LEVEL\n        k_min = cfg.FPN.ROI_MIN_LEVEL\n        # Same format as rois blob, but one per FPN level\n        for lvl in range(k_min, k_max + 1):\n            blob_names += ['rois_fpn' + str(lvl)]\n        blob_names += ['rois_idx_restore_int32']\n        if is_training:\n            if cfg.MODEL.MASK_ON:\n                for lvl in range(k_min, k_max + 1):\n                    blob_names += ['mask_rois_fpn' + str(lvl)]\n                blob_names += ['mask_rois_idx_restore_int32']\n            if cfg.MODEL.KEYPOINTS_ON:\n                for lvl in range(k_min, k_max + 1):\n                    blob_names += ['keypoint_rois_fpn' + str(lvl)]\n                blob_names += ['keypoint_rois_idx_restore_int32']\n    return blob_names\n\n\ndef add_fast_rcnn_blobs(blobs, im_scales, roidb):\n    \"\"\"Add blobs needed for training Fast R-CNN style models.\"\"\"\n    # Sample training RoIs from each image and append them to the blob lists\n    for im_i, entry in enumerate(roidb):\n        frcn_blobs = _sample_rois(entry, im_scales[im_i], im_i)\n        for k, v in frcn_blobs.items():\n            blobs[k].append(v)\n    # Concat the training blob lists into tensors\n    for k, v in blobs.items():\n        if isinstance(v, list) and len(v) > 0:\n            blobs[k] = np.concatenate(v)\n    # Add FPN multilevel training RoIs, if configured\n    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:\n        _add_multilevel_rois(blobs)\n\n    # Perform any final work and validity checks after the collating blobs for\n    # all minibatch images\n    valid = True\n    if cfg.MODEL.KEYPOINTS_ON:\n        valid = keypoint_rcnn_roi_data.finalize_keypoint_minibatch(blobs, valid)\n\n    return valid\n\n\ndef _sample_rois(roidb, im_scale, batch_idx):\n    \"\"\"Generate a random sample of RoIs comprising foreground and background\n    examples.\n    \"\"\"\n    rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM)\n    fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image))\n    max_overlaps = roidb['max_overlaps']\n\n    # Select foreground RoIs as those with >= FG_THRESH overlap\n    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]\n    # Guard against the case when an image has fewer than fg_rois_per_image\n    # foreground RoIs\n    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)\n    # Sample foreground regions without replacement\n    if fg_inds.size > 0:\n        fg_inds = npr.choice(\n            fg_inds, size=fg_rois_per_this_image, replace=False\n        )\n\n    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)\n    bg_inds = np.where(\n        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) &\n        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO)\n    )[0]\n    # Compute number of background RoIs to take from this image (guarding\n    # against there being fewer than desired)\n    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image\n    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size)\n    # Sample foreground regions without replacement\n    if bg_inds.size > 0:\n        bg_inds = npr.choice(\n            bg_inds, size=bg_rois_per_this_image, replace=False\n        )\n\n    # The indices that we're selecting (both fg and bg)\n    keep_inds = np.append(fg_inds, bg_inds)\n    # Label is the class each RoI has max overlap with\n    sampled_labels = roidb['max_classes'][keep_inds]\n    sampled_labels[fg_rois_per_this_image:] = 0  # Label bg RoIs with class 0\n    sampled_boxes = roidb['boxes'][keep_inds]\n\n    bbox_targets, bbox_inside_weights = _expand_bbox_targets(\n        roidb['bbox_targets'][keep_inds, :]\n    )\n    bbox_outside_weights = np.array(\n        bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype\n    )\n\n    # Scale rois and format as (batch_idx, x1, y1, x2, y2)\n    sampled_rois = sampled_boxes * im_scale\n    repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1))\n    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))\n\n    # Base Fast R-CNN blobs\n    blob_dict = dict(\n        labels_int32=sampled_labels.astype(np.int32, copy=False),\n        rois=sampled_rois,\n        bbox_targets=bbox_targets,\n        bbox_inside_weights=bbox_inside_weights,\n        bbox_outside_weights=bbox_outside_weights\n    )\n\n    # Optionally add Mask R-CNN blobs\n    if cfg.MODEL.MASK_ON:\n        mask_rcnn_roi_data.add_mask_rcnn_blobs(\n            blob_dict, sampled_boxes, roidb, im_scale, batch_idx\n        )\n\n    # Optionally add Keypoint R-CNN blobs\n    if cfg.MODEL.KEYPOINTS_ON:\n        keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(\n            blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx\n        )\n\n    return blob_dict\n\n\ndef _expand_bbox_targets(bbox_target_data):\n    \"\"\"Bounding-box regression targets are stored in a compact form in the\n    roidb.\n\n    This function expands those targets into the 4-of-4*K representation used\n    by the network (i.e. only one class has non-zero targets). The loss weights\n    are similarly expanded.\n\n    Returns:\n        bbox_target_data (ndarray): N x 4K blob of regression targets\n        bbox_inside_weights (ndarray): N x 4K blob of loss weights\n    \"\"\"\n    num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES\n    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:\n        num_bbox_reg_classes = 2  # bg and fg\n\n    clss = bbox_target_data[:, 0]\n    bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes))\n    bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)\n    inds = np.where(clss > 0)[0]\n    for ind in inds:\n        cls = int(clss[ind])\n        start = 4 * cls\n        end = start + 4\n        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]\n        bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0)\n    return bbox_targets, bbox_inside_weights\n\n\ndef _add_multilevel_rois(blobs):\n    \"\"\"By default training RoIs are added for a single feature map level only.\n    When using FPN, the RoIs must be distributed over different FPN levels\n    according the level assignment heuristic (see: modeling.FPN.\n    map_rois_to_fpn_levels).\n    \"\"\"\n    lvl_min = cfg.FPN.ROI_MIN_LEVEL\n    lvl_max = cfg.FPN.ROI_MAX_LEVEL\n\n    def _distribute_rois_over_fpn_levels(rois_blob_name):\n        \"\"\"Distribute rois over the different FPN levels.\"\"\"\n        # Get target level for each roi\n        # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take\n        # the box coordinates from columns 1:5\n        target_lvls = fpn.map_rois_to_fpn_levels(\n            blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max\n        )\n        # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>\n        fpn.add_multilevel_roi_blobs(\n            blobs, rois_blob_name, blobs[rois_blob_name], target_lvls, lvl_min,\n            lvl_max\n        )\n\n    _distribute_rois_over_fpn_levels('rois')\n    if cfg.MODEL.MASK_ON:\n        _distribute_rois_over_fpn_levels('mask_rois')\n    if cfg.MODEL.KEYPOINTS_ON:\n        _distribute_rois_over_fpn_levels('keypoint_rois')\n"
  },
  {
    "path": "detectron/roi_data/keypoint_rcnn.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Construct minibatches for Mask R-CNN training when keypoints are enabled.\nHandles the minibatch blobs that are specific to training Mask R-CNN for\nkeypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are\nhandled by their respecitive roi_data modules.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.keypoints as keypoint_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef add_keypoint_rcnn_blobs(\n    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx\n):\n    \"\"\"Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.\"\"\"\n    # Note: gt_inds must match how they're computed in\n    # datasets.json_dataset._merge_proposal_boxes_into_roidb\n    gt_inds = np.where(roidb['gt_classes'] > 0)[0]\n    max_overlaps = roidb['max_overlaps']\n    gt_keypoints = roidb['gt_keypoints']\n\n    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]\n    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])\n    vis_kp = gt_keypoints[ind_kp, 2, :] > 0\n    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0\n    kp_fg_inds = np.where(\n        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)\n    )[0]\n\n    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)\n    if kp_fg_inds.size > kp_fg_rois_per_this_image:\n        kp_fg_inds = np.random.choice(\n            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False\n        )\n\n    sampled_fg_rois = roidb['boxes'][kp_fg_inds]\n    box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]\n\n    num_keypoints = gt_keypoints.shape[2]\n    sampled_keypoints = -np.ones(\n        (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),\n        dtype=gt_keypoints.dtype\n    )\n    for ii in range(len(sampled_fg_rois)):\n        ind = box_to_gt_ind_map[ii]\n        if ind >= 0:\n            sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]\n            assert np.sum(sampled_keypoints[ii, 2, :]) > 0\n\n    heats, weights = keypoint_utils.keypoints_to_heatmap_labels(\n        sampled_keypoints, sampled_fg_rois\n    )\n\n    shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)\n    heats = heats.reshape(shape)\n    weights = weights.reshape(shape)\n\n    sampled_fg_rois *= im_scale\n    repeated_batch_idx = batch_idx * blob_utils.ones(\n        (sampled_fg_rois.shape[0], 1)\n    )\n    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))\n\n    blobs['keypoint_rois'] = sampled_fg_rois\n    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)\n    blobs['keypoint_weights'] = weights\n\n\ndef finalize_keypoint_minibatch(blobs, valid):\n    \"\"\"Finalize the minibatch after blobs for all minibatch images have been\n    collated.\n    \"\"\"\n    min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH\n    num_visible_keypoints = np.sum(blobs['keypoint_weights'])\n    valid = (\n        valid and len(blobs['keypoint_weights']) > 0 and\n        num_visible_keypoints > min_count\n    )\n    # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.\n    # See modeling.model_builder.add_keypoint_losses\n    norm = num_visible_keypoints / (\n        cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM *\n        cfg.TRAIN.FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS\n    )\n    blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32)\n    return valid\n\n\ndef _within_box(points, boxes):\n    \"\"\"Validate which keypoints are contained inside a given box.\n\n    points: Nx2xK\n    boxes: Nx4\n    output: NxK\n    \"\"\"\n    x_within = np.logical_and(\n        points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1),\n        points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1)\n    )\n    y_within = np.logical_and(\n        points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1),\n        points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1)\n    )\n    return np.logical_and(x_within, y_within)\n"
  },
  {
    "path": "detectron/roi_data/loader.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Detectron data loader. The design is generic and abstracted away from any\ndetails of the minibatch. A minibatch is a dictionary of blob name keys and\ntheir associated numpy (float32 or int32) ndarray values.\n\nOutline of the data loader design:\n\nloader thread\\\nloader thread \\                    / GPU 1 enqueue thread -> feed -> EnqueueOp\n...           -> minibatch queue ->  ...\nloader thread /                    \\ GPU N enqueue thread -> feed -> EnqueueOp\nloader thread/\n\n<---------------------------- CPU -----------------------------|---- GPU ---->\n\nA pool of loader threads construct minibatches that are put onto the shared\nminibatch queue. Each GPU has an enqueue thread that pulls a minibatch off the\nminibatch queue, feeds the minibatch blobs into the workspace, and then runs\nan EnqueueBlobsOp to place the minibatch blobs into the GPU's blobs queue.\nDuring each fprop the first thing the network does is run a DequeueBlobsOp\nin order to populate the workspace with the blobs from a queued minibatch.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import deque\nfrom collections import OrderedDict\nimport logging\nimport numpy as np\nimport signal\nimport threading\nimport time\nimport uuid\nfrom six.moves import queue as Queue\n\nfrom caffe2.python import core, workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.roi_data.minibatch import get_minibatch\nfrom detectron.roi_data.minibatch import get_minibatch_blob_names\nfrom detectron.utils.coordinator import coordinated_get\nfrom detectron.utils.coordinator import coordinated_put\nfrom detectron.utils.coordinator import Coordinator\nimport detectron.utils.c2 as c2_utils\n\nlogger = logging.getLogger(__name__)\n\n\nclass RoIDataLoader:\n    def __init__(\n        self,\n        roidb,\n        num_loaders=4,\n        minibatch_queue_size=64,\n        blobs_queue_capacity=8\n    ):\n        self._roidb = roidb\n        self._lock = threading.Lock()\n        self._perm = deque(range(len(self._roidb)))\n        self._cur = 0  # _perm cursor\n        # The minibatch queue holds prepared training data in host (CPU) memory\n        # When training with N > 1 GPUs, each element in the minibatch queue\n        # is actually a partial minibatch which contributes 1 / N of the\n        # examples to the overall minibatch\n        self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size)\n        self._blobs_queue_capacity = blobs_queue_capacity\n        # Random queue name in case one instantiates multple RoIDataLoaders\n        self._loader_id = uuid.uuid4()\n        self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id)\n        # Loader threads construct (partial) minibatches and put them on the\n        # minibatch queue\n        self._num_loaders = num_loaders\n        self._num_gpus = cfg.NUM_GPUS\n        self.coordinator = Coordinator()\n\n        self._output_names = get_minibatch_blob_names()\n        self._shuffle_roidb_inds()\n        self.create_threads()\n\n    def minibatch_loader_thread(self):\n        \"\"\"Load mini-batches and put them onto the mini-batch queue.\"\"\"\n        with self.coordinator.stop_on_exception():\n            while not self.coordinator.should_stop():\n                blobs = self.get_next_minibatch()\n                # Blobs must be queued in the order specified by\n                # self.get_output_names\n                ordered_blobs = OrderedDict()\n                for key in self.get_output_names():\n                    assert blobs[key].dtype in (np.int32, np.float32), \\\n                        'Blob {} of dtype {} must have dtype of ' \\\n                        'np.int32 or np.float32'.format(key, blobs[key].dtype)\n                    ordered_blobs[key] = blobs[key]\n                coordinated_put(\n                    self.coordinator, self._minibatch_queue, ordered_blobs\n                )\n        logger.info('Stopping mini-batch loading thread')\n\n    def enqueue_blobs_thread(self, gpu_id, blob_names):\n        \"\"\"Transfer mini-batches from a mini-batch queue to a BlobsQueue.\"\"\"\n        with self.coordinator.stop_on_exception():\n            while not self.coordinator.should_stop():\n                if self._minibatch_queue.qsize == 0:\n                    logger.warning('Mini-batch queue is empty')\n                blobs = coordinated_get(self.coordinator, self._minibatch_queue)\n                self.enqueue_blobs(gpu_id, blob_names, blobs.values())\n                logger.debug(\n                    'batch queue size {}'.format(self._minibatch_queue.qsize())\n                )\n            logger.info('Stopping enqueue thread')\n\n    def get_next_minibatch(self):\n        \"\"\"Return the blobs to be used for the next minibatch. Thread safe.\"\"\"\n        valid = False\n        while not valid:\n            db_inds = self._get_next_minibatch_inds()\n            minibatch_db = [self._roidb[i] for i in db_inds]\n            blobs, valid = get_minibatch(minibatch_db)\n        return blobs\n\n    def _shuffle_roidb_inds(self):\n        \"\"\"Randomly permute the training roidb. Not thread safe.\"\"\"\n        if cfg.TRAIN.ASPECT_GROUPING:\n            widths = np.array([r['width'] for r in self._roidb])\n            heights = np.array([r['height'] for r in self._roidb])\n            horz = (widths >= heights)\n            vert = np.logical_not(horz)\n            horz_inds = np.where(horz)[0]\n            vert_inds = np.where(vert)[0]\n\n            horz_inds = np.random.permutation(horz_inds)\n            vert_inds = np.random.permutation(vert_inds)\n            mb = cfg.TRAIN.IMS_PER_BATCH\n            horz_inds = horz_inds[:(len(horz_inds) // mb) * mb]\n            vert_inds = vert_inds[:(len(vert_inds) // mb) * mb]\n            inds = np.hstack((horz_inds, vert_inds))\n\n            inds = np.reshape(inds, (-1, mb))\n            row_perm = np.random.permutation(np.arange(inds.shape[0]))\n            inds = np.reshape(inds[row_perm, :], (-1, ))\n            self._perm = inds\n        else:\n            self._perm = np.random.permutation(np.arange(len(self._roidb)))\n        self._perm = deque(self._perm)\n        self._cur = 0\n\n    def _get_next_minibatch_inds(self):\n        \"\"\"Return the roidb indices for the next minibatch. Thread safe.\"\"\"\n        with self._lock:\n            # We use a deque and always take the *first* IMS_PER_BATCH items\n            # followed by *rotating* the deque so that we see fresh items\n            # each time. If the length of _perm is not divisible by\n            # IMS_PER_BATCH, then we end up wrapping around the permutation.\n            db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)]\n            self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH)\n            self._cur += cfg.TRAIN.IMS_PER_BATCH\n            if self._cur >= len(self._perm):\n                self._shuffle_roidb_inds()\n        return db_inds\n\n    def get_output_names(self):\n        return self._output_names\n\n    def enqueue_blobs(self, gpu_id, blob_names, blobs):\n        \"\"\"Put a mini-batch on a BlobsQueue.\"\"\"\n        assert len(blob_names) == len(blobs)\n        t = time.time()\n        dev = c2_utils.CudaDevice(gpu_id)\n        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)\n        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]\n        for (blob_name, blob) in zip(blob_names, blobs):\n            workspace.FeedBlob(blob_name, blob, device_option=dev)\n        logger.debug(\n            'enqueue_blobs {}: workspace.FeedBlob: {}'.\n            format(gpu_id, time.time() - t)\n        )\n        t = time.time()\n        op = core.CreateOperator(\n            'SafeEnqueueBlobs', [queue_name] + blob_names,\n            blob_names + [queue_name + '_enqueue_status'],\n            device_option=dev\n        )\n        workspace.RunOperatorOnce(op)\n        logger.debug(\n            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.\n            format(gpu_id, time.time() - t)\n        )\n\n    def create_threads(self):\n        # Create mini-batch loader threads, each of which builds mini-batches\n        # and places them into a queue in CPU memory\n        self._workers = [\n            threading.Thread(target=self.minibatch_loader_thread)\n            for _ in range(self._num_loaders)\n        ]\n\n        # Create one BlobsQueue per GPU\n        # (enqueue_blob_names are unscoped)\n        enqueue_blob_names = self.create_blobs_queues()\n\n        # Create one enqueuer thread per GPU\n        self._enqueuers = [\n            threading.Thread(\n                target=self.enqueue_blobs_thread,\n                args=(gpu_id, enqueue_blob_names)\n            ) for gpu_id in range(self._num_gpus)\n        ]\n\n    def start(self, prefill=False):\n        for w in self._workers + self._enqueuers:\n            w.setDaemon(True)\n            w.start()\n        if prefill:\n            logger.info('Pre-filling mini-batch queue...')\n            while not self._minibatch_queue.full():\n                logger.info(\n                    '  [{:d}/{:d}]'.format(\n                        self._minibatch_queue.qsize(),\n                        self._minibatch_queue.maxsize\n                    )\n                )\n                time.sleep(0.1)\n                # Detect failure and shutdown\n                if self.coordinator.should_stop():\n                    self.shutdown()\n                    break\n\n    def has_stopped(self):\n        return self.coordinator.should_stop()\n\n    def shutdown(self):\n        self.coordinator.request_stop()\n        self.coordinator.wait_for_stop()\n        self.close_blobs_queues()\n        for w in self._workers + self._enqueuers:\n            w.join()\n\n    def create_blobs_queues(self):\n        \"\"\"Create one BlobsQueue for each GPU to hold mini-batches.\"\"\"\n        for gpu_id in range(self._num_gpus):\n            with c2_utils.GpuNameScope(gpu_id):\n                workspace.RunOperatorOnce(\n                    core.CreateOperator(\n                        'CreateBlobsQueue', [], [self._blobs_queue_name],\n                        num_blobs=len(self.get_output_names()),\n                        capacity=self._blobs_queue_capacity\n                    )\n                )\n        return self.create_enqueue_blobs()\n\n    def close_blobs_queues(self):\n        \"\"\"Close a BlobsQueue.\"\"\"\n        for gpu_id in range(self._num_gpus):\n            with core.NameScope('gpu_{}'.format(gpu_id)):\n                workspace.RunOperatorOnce(\n                    core.CreateOperator(\n                        'CloseBlobsQueue', [self._blobs_queue_name], []\n                    )\n                )\n\n    def create_enqueue_blobs(self):\n        blob_names = self.get_output_names()\n        enqueue_blob_names = [\n            '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names\n        ]\n        for gpu_id in range(self._num_gpus):\n            with c2_utils.NamedCudaScope(gpu_id):\n                for blob in enqueue_blob_names:\n                    workspace.CreateBlob(core.ScopedName(blob))\n        return enqueue_blob_names\n\n    def register_sigint_handler(self):\n        def signal_handler(signal, frame):\n            logger.info(\n                'SIGINT: Shutting down RoIDataLoader threads and exiting...'\n            )\n            self.shutdown()\n\n        signal.signal(signal.SIGINT, signal_handler)\n"
  },
  {
    "path": "detectron/roi_data/mask_rcnn.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Construct minibatches for Mask R-CNN training. Handles the minibatch blobs\nthat are specific to Mask R-CNN. Other blobs that are generic to RPN or\nFast/er R-CNN are handled by their respecitive roi_data modules.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\nimport detectron.utils.segms as segm_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):\n    \"\"\"Add Mask R-CNN specific blobs to the input blob dictionary.\"\"\"\n    # Prepare the mask targets by associating one gt mask to each training roi\n    # that has a fg (non-bg) class label.\n    M = cfg.MRCNN.RESOLUTION\n    polys_gt_inds = np.where(\n        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)\n    )[0]\n    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]\n    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)\n    fg_inds = np.where(blobs['labels_int32'] > 0)[0]\n    roi_has_mask = blobs['labels_int32'].copy()\n    roi_has_mask[roi_has_mask > 0] = 1\n\n    if fg_inds.shape[0] > 0:\n        # Class labels for the foreground rois\n        mask_class_labels = blobs['labels_int32'][fg_inds]\n        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)\n\n        # Find overlap between all foreground rois and the bounding boxes\n        # enclosing each segmentation\n        rois_fg = sampled_boxes[fg_inds]\n        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(\n            rois_fg.astype(np.float32, copy=False),\n            boxes_from_polys.astype(np.float32, copy=False)\n        )\n        # Map from each fg rois to the index of the mask with highest overlap\n        # (measured by bbox overlap)\n        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)\n\n        # add fg targets\n        for i in range(rois_fg.shape[0]):\n            fg_polys_ind = fg_polys_inds[i]\n            poly_gt = polys_gt[fg_polys_ind]\n            roi_fg = rois_fg[i]\n            # Rasterize the portion of the polygon mask within the given fg roi\n            # to an M x M binary image\n            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)\n            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary\n            masks[i, :] = np.reshape(mask, M**2)\n    else:  # If there are no fg masks (it does happen)\n        # The network cannot handle empty blobs, so we must provide a mask\n        # We simply take the first bg roi, given it an all -1's mask (ignore\n        # label), and label it with class zero (bg).\n        bg_inds = np.where(blobs['labels_int32'] == 0)[0]\n        # rois_fg is actually one background roi, but that's ok because ...\n        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))\n        # We give it an -1's blob (ignore label)\n        masks = -blob_utils.ones((1, M**2), int32=True)\n        # We label it with class = 0 (background)\n        mask_class_labels = blob_utils.zeros((1, ))\n        # Mark that the first roi has a mask\n        roi_has_mask[0] = 1\n\n    if cfg.MRCNN.CLS_SPECIFIC_MASK:\n        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)\n\n    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)\n    rois_fg *= im_scale\n    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))\n    rois_fg = np.hstack((repeated_batch_idx, rois_fg))\n\n    # Update blobs dict with Mask R-CNN blobs\n    blobs['mask_rois'] = rois_fg\n    blobs['roi_has_mask_int32'] = roi_has_mask\n    blobs['masks_int32'] = masks\n\n\ndef _expand_to_class_specific_mask_targets(masks, mask_class_labels):\n    \"\"\"Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)\n    to encode class specific mask targets.\n    \"\"\"\n    assert masks.shape[0] == mask_class_labels.shape[0]\n    M = cfg.MRCNN.RESOLUTION\n\n    # Target values of -1 are \"don't care\" / ignore labels\n    mask_targets = -blob_utils.ones(\n        (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True\n    )\n\n    for i in range(masks.shape[0]):\n        cls = int(mask_class_labels[i])\n        start = M**2 * cls\n        end = start + M**2\n        # Ignore background instance\n        # (only happens when there is no fg samples in an image)\n        if cls > 0:\n            mask_targets[i, start:end] = masks[i, :]\n\n    return mask_targets\n"
  },
  {
    "path": "detectron/roi_data/minibatch.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Construct minibatches for Detectron networks.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport logging\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.roi_data.fast_rcnn as fast_rcnn_roi_data\nimport detectron.roi_data.retinanet as retinanet_roi_data\nimport detectron.roi_data.rpn as rpn_roi_data\nimport detectron.utils.blob as blob_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_minibatch_blob_names(is_training=True):\n    \"\"\"Return blob names in the order in which they are read by the data loader.\n    \"\"\"\n    # data blob: holds a batch of N images, each with 3 channels\n    blob_names = ['data']\n    if cfg.RPN.RPN_ON:\n        # RPN-only or end-to-end Faster R-CNN\n        blob_names += rpn_roi_data.get_rpn_blob_names(is_training=is_training)\n    elif cfg.RETINANET.RETINANET_ON:\n        blob_names += retinanet_roi_data.get_retinanet_blob_names(\n            is_training=is_training\n        )\n    else:\n        # Fast R-CNN like models trained on precomputed proposals\n        blob_names += fast_rcnn_roi_data.get_fast_rcnn_blob_names(\n            is_training=is_training\n        )\n    return blob_names\n\n\ndef get_minibatch(roidb):\n    \"\"\"Given a roidb, construct a minibatch sampled from it.\"\"\"\n    # We collect blobs from each image onto a list and then concat them into a\n    # single tensor, hence we initialize each blob to an empty list\n    blobs = {k: [] for k in get_minibatch_blob_names()}\n    # Get the input image blob, formatted for caffe2\n    im_blob, im_scales = _get_image_blob(roidb)\n    blobs['data'] = im_blob\n    if cfg.RPN.RPN_ON:\n        # RPN-only or end-to-end Faster/Mask R-CNN\n        valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb)\n    elif cfg.RETINANET.RETINANET_ON:\n        im_width, im_height = im_blob.shape[3], im_blob.shape[2]\n        # im_width, im_height corresponds to the network input: padded image\n        # (if needed) width and height. We pass it as input and slice the data\n        # accordingly so that we don't need to use SampleAsOp\n        valid = retinanet_roi_data.add_retinanet_blobs(\n            blobs, im_scales, roidb, im_width, im_height\n        )\n    else:\n        # Fast R-CNN like models trained on precomputed proposals\n        valid = fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)\n    return blobs, valid\n\n\ndef _get_image_blob(roidb):\n    \"\"\"Builds an input blob from the images in the roidb at the specified\n    scales.\n    \"\"\"\n    num_images = len(roidb)\n    # Sample random scales to use for each image in this batch\n    scale_inds = np.random.randint(\n        0, high=len(cfg.TRAIN.SCALES), size=num_images\n    )\n    processed_ims = []\n    im_scales = []\n    for i in range(num_images):\n        im = cv2.imread(roidb[i]['image'])\n        assert im is not None, \\\n            'Failed to read image \\'{}\\''.format(roidb[i]['image'])\n        if roidb[i]['flipped']:\n            im = im[:, ::-1, :]\n        target_size = cfg.TRAIN.SCALES[scale_inds[i]]\n        im, im_scale = blob_utils.prep_im_for_blob(\n            im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE\n        )\n        im_scales.append(im_scale)\n        processed_ims.append(im)\n\n    # Create a blob to hold the input images\n    blob = blob_utils.im_list_to_blob(processed_ims)\n\n    return blob, im_scales\n"
  },
  {
    "path": "detectron/roi_data/retinanet.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Compute minibatch blobs for training a RetinaNet network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport logging\n\nimport detectron.utils.boxes as box_utils\nimport detectron.roi_data.data_utils as data_utils\nfrom detectron.core.config import cfg\n\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_retinanet_blob_names(is_training=True):\n    \"\"\"\n    Returns blob names in the order in which they are read by the data\n    loader.\n\n    N = number of images per minibatch\n    A = number of anchors = num_scales * num_aspect_ratios\n        (for example 9 used in RetinaNet paper)\n    H, W = spatial dimensions (different for each FPN level)\n    M = Out of all the anchors generated, depending on the positive/negative IoU\n        overlap thresholds, we will have M positive anchors. These are the anchors\n        that bounding box branch will regress on.\n\n    retnet_cls_labels -> labels for the cls branch for each FPN level\n                         Shape: N x A x H x W\n\n    retnet_roi_bbox_targets -> targets for the bbox regression branch\n                               Shape: M x 4\n\n    retnet_roi_fg_bbox_locs -> for the bbox regression, since we are only\n                               interested in regressing on fg bboxes which are\n                               M in number and the output prediction of the network\n                               is of shape N x (A * 4) x H x W\n                               (in case of non class-specific bbox), so we\n                               store the locations of positive fg boxes in this\n                               blob retnet_roi_fg_bbox_locs of shape M x 4 where\n                               each row looks like: [img_id, anchor_id, x_loc, y_loc]\n    \"\"\"\n    # im_info: (height, width, image scale)\n    blob_names = ['im_info']\n    assert cfg.FPN.FPN_ON, \"RetinaNet uses FPN for dense detection\"\n    # Same format as RPN blobs, but one per FPN level\n    if is_training:\n        blob_names += ['retnet_fg_num', 'retnet_bg_num']\n        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):\n            suffix = 'fpn{}'.format(lvl)\n            blob_names += [\n                'retnet_cls_labels_' + suffix,\n                'retnet_roi_bbox_targets_' + suffix,\n                'retnet_roi_fg_bbox_locs_' + suffix,\n            ]\n    return blob_names\n\n\ndef add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):\n    \"\"\"Add RetinaNet blobs.\"\"\"\n    # RetinaNet is applied to many feature levels, as in the FPN paper\n    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL\n    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE\n    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)\n    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS\n    anchor_scale = cfg.RETINANET.ANCHOR_SCALE\n\n    # get anchors from all levels for all scales/aspect ratios\n    foas = []\n    for lvl in range(k_min, k_max + 1):\n        stride = 2. ** lvl\n        for octave in range(scales_per_octave):\n            octave_scale = 2 ** (octave / float(scales_per_octave))\n            for idx in range(num_aspect_ratios):\n                anchor_sizes = (stride * octave_scale * anchor_scale, )\n                anchor_aspect_ratios = (aspect_ratios[idx], )\n                foa = data_utils.get_field_of_anchors(\n                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)\n                foas.append(foa)\n    all_anchors = np.concatenate([f.field_of_anchors for f in foas])\n\n    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0\n    for im_i, entry in enumerate(roidb):\n        scale = im_scales[im_i]\n        im_height = np.round(entry['height'] * scale)\n        im_width = np.round(entry['width'] * scale)\n        gt_inds = np.where(\n            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]\n        assert len(gt_inds) > 0, \\\n            'Empty ground truth empty for image is not allowed. Please check.'\n\n        gt_rois = entry['boxes'][gt_inds, :] * scale\n        gt_classes = entry['gt_classes'][gt_inds]\n\n        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)\n        blobs['im_info'].append(im_info)\n\n        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(\n            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)\n        for i, foa in enumerate(foas):\n            for k, v in retinanet_blobs[i].items():\n                # the way it stacks is:\n                # [[anchors for image1] + [anchors for images 2]]\n                level = int(np.log2(foa.stride))\n                key = '{}_fpn{}'.format(k, level)\n                if k == 'retnet_roi_fg_bbox_locs':\n                    v[:, 0] = im_i\n                    # loc_stride: 80 * 4 if cls_specific else 4\n                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction\n                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:\n                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)\n                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect\n                    # v[:, 1] is the class label [range 0-80] if we do\n                    # class-specfic bbox otherwise it is 0. In case of class\n                    # specific, based on the label, the location of current\n                    # anchor is class_label * 4 and then we take into account\n                    # the anchor_ind if the anchors\n                    v[:, 1] *= 4\n                    v[:, 1] += loc_stride * anchor_ind\n                blobs[key].append(v)\n        blobs['retnet_fg_num'] += fg_num\n        blobs['retnet_bg_num'] += bg_num\n\n    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)\n    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)\n\n    N = len(roidb)\n    for k, v in blobs.items():\n        if isinstance(v, list) and len(v) > 0:\n            # compute number of anchors\n            A = int(len(v) / N)\n            # for the cls branch labels [per fpn level],\n            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step\n            # and length of this list is N x A where\n            # N = num_images, A = num_anchors for example, N = 2, A = 9\n            # Each element of the list has the shape 1 x 1 x H x W where H, W are\n            # spatial dimension of curret fpn lvl. Let a{i} denote the element\n            # corresponding to anchor i [9 anchors total] in the list.\n            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]\n            # however the network will make predictions like 2 x (9 * 80) x H x W\n            # so we first concatenate the elements of each image to a numpy array\n            # and then concatenate the two images to get the 2 x 9 x H x W\n\n            if k.find('retnet_cls_labels') >= 0:\n                tmp = []\n                # concat anchors within an image\n                for i in range(0, len(v), A):\n                    tmp.append(np.concatenate(v[i: i + A], axis=1))\n                # concat images\n                blobs[k] = np.concatenate(tmp, axis=0)\n            else:\n                # for the bbox branch elements [per FPN level],\n                #  we have the targets and the fg boxes locations\n                # in the shape: M x 4 where M is the number of fg locations in a\n                # given image at the current FPN level. For the given level,\n                # the bbox predictions will be. The elements in the list are in\n                # order [[a0, ..., a9], [a0, ..., a9]]\n                # Concatenate them to form M x 4\n                blobs[k] = np.concatenate(v, axis=0)\n    return True\n\n\ndef _get_retinanet_blobs(\n        foas, all_anchors, gt_boxes, gt_classes, im_width, im_height):\n    total_anchors = all_anchors.shape[0]\n    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(\n        im_height, im_width))\n\n    inds_inside = np.arange(all_anchors.shape[0])\n    anchors = all_anchors\n    num_inside = len(inds_inside)\n\n    logger.debug('total_anchors: {}'.format(total_anchors))\n    logger.debug('inds_inside: {}'.format(num_inside))\n    logger.debug('anchors.shape: {}'.format(anchors.shape))\n\n    # Compute anchor labels:\n    # label=1 is positive, 0 is negative, -1 is don't care (ignore)\n    labels = np.empty((num_inside, ), dtype=np.float32)\n    labels.fill(-1)\n    if len(gt_boxes) > 0:\n        # Compute overlaps between the anchors and the gt boxes overlaps\n        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)\n        # Map from anchor to gt box that has highest overlap\n        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)\n        # For each anchor, amount of overlap with most overlapping gt box\n        anchor_to_gt_max = anchor_by_gt_overlap[\n            np.arange(num_inside), anchor_to_gt_argmax]\n\n        # Map from gt box to an anchor that has highest overlap\n        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)\n        # For each gt box, amount of overlap with most overlapping anchor\n        gt_to_anchor_max = anchor_by_gt_overlap[\n            gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])]\n        # Find all anchors that share the max overlap amount\n        # (this includes many ties)\n        anchors_with_max_overlap = np.where(\n            anchor_by_gt_overlap == gt_to_anchor_max)[0]\n\n        # Fg label: for each gt use anchors with highest overlap\n        # (including ties)\n        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]\n        labels[anchors_with_max_overlap] = gt_classes[gt_inds]\n        # Fg label: above threshold IOU\n        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP\n        gt_inds = anchor_to_gt_argmax[inds]\n        labels[inds] = gt_classes[gt_inds]\n\n    fg_inds = np.where(labels >= 1)[0]\n    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]\n    labels[bg_inds] = 0\n    num_fg, num_bg = len(fg_inds), len(bg_inds)\n\n    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)\n    bbox_targets[fg_inds, :] = data_utils.compute_targets(\n        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])\n\n    # Map up to original set of anchors\n    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)\n    bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0)\n\n    # Split the generated labels, etc. into labels per each field of anchors\n    blobs_out = []\n    start_idx = 0\n    for foa in foas:\n        H = foa.field_size\n        W = foa.field_size\n        end_idx = start_idx + H * W\n        _labels = labels[start_idx:end_idx]\n        _bbox_targets = bbox_targets[start_idx:end_idx, :]\n        start_idx = end_idx\n\n        # labels output with shape (1, height, width)\n        _labels = _labels.reshape((1, 1, H, W))\n        # bbox_targets output with shape (1, 4 * A, height, width)\n        _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2)\n        stride = foa.stride\n        w = int(im_width / stride)\n        h = int(im_height / stride)\n\n        # data for select_smooth_l1 loss\n        num_classes = cfg.MODEL.NUM_CLASSES - 1\n        inds_4d = np.where(_labels > 0)\n        M = len(inds_4d)\n        _roi_bbox_targets = np.zeros((0, 4))\n        _roi_fg_bbox_locs = np.zeros((0, 4))\n        if M > 0:\n            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]\n            _roi_bbox_targets = np.zeros((len(im_inds), 4))\n            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))\n            lbls = _labels[im_inds, :, y, x]\n            for i, lbl in enumerate(lbls):\n                l = lbl[0] - 1\n                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:\n                    l = 0\n                assert l >= 0 and l < num_classes, 'label out of the range'\n                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]\n                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])\n        blobs_out.append(\n            dict(\n                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),\n                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),\n                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),\n            ))\n    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)\n    out_num_bg = (\n        np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +\n        out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))\n    return blobs_out, out_num_fg, out_num_bg\n"
  },
  {
    "path": "detectron/roi_data/rpn.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Minibatch construction for Region Proposal Networks (RPN).\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\nimport numpy.random as npr\n\nfrom detectron.core.config import cfg\nimport detectron.roi_data.data_utils as data_utils\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.boxes as box_utils\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_rpn_blob_names(is_training=True):\n    \"\"\"Blob names used by RPN.\"\"\"\n    # im_info: (height, width, image scale)\n    blob_names = ['im_info']\n    if is_training:\n        # gt boxes: (batch_idx, x1, y1, x2, y2, cls)\n        blob_names += ['roidb']\n        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:\n            # Same format as RPN blobs, but one per FPN level\n            for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):\n                blob_names += [\n                    'rpn_labels_int32_wide_fpn' + str(lvl),\n                    'rpn_bbox_targets_wide_fpn' + str(lvl),\n                    'rpn_bbox_inside_weights_wide_fpn' + str(lvl),\n                    'rpn_bbox_outside_weights_wide_fpn' + str(lvl)\n                ]\n        else:\n            # Single level RPN blobs\n            blob_names += [\n                'rpn_labels_int32_wide',\n                'rpn_bbox_targets_wide',\n                'rpn_bbox_inside_weights_wide',\n                'rpn_bbox_outside_weights_wide'\n            ]\n    return blob_names\n\n\ndef add_rpn_blobs(blobs, im_scales, roidb):\n    \"\"\"Add blobs needed training RPN-only and end-to-end Faster R-CNN models.\"\"\"\n    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:\n        # RPN applied to many feature levels, as in the FPN paper\n        k_max = cfg.FPN.RPN_MAX_LEVEL\n        k_min = cfg.FPN.RPN_MIN_LEVEL\n        foas = []\n        for lvl in range(k_min, k_max + 1):\n            field_stride = 2.**lvl\n            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )\n            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS\n            foa = data_utils.get_field_of_anchors(\n                field_stride, anchor_sizes, anchor_aspect_ratios\n            )\n            foas.append(foa)\n        all_anchors = np.concatenate([f.field_of_anchors for f in foas])\n    else:\n        foa = data_utils.get_field_of_anchors(\n            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS\n        )\n        all_anchors = foa.field_of_anchors\n\n    for im_i, entry in enumerate(roidb):\n        scale = im_scales[im_i]\n        im_height = np.round(entry['height'] * scale)\n        im_width = np.round(entry['width'] * scale)\n        gt_inds = np.where(\n            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)\n        )[0]\n        gt_rois = entry['boxes'][gt_inds, :] * scale\n        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)\n        blobs['im_info'].append(im_info)\n\n        # Add RPN targets\n        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:\n            # RPN applied to many feature levels, as in the FPN paper\n            rpn_blobs = _get_rpn_blobs(\n                im_height, im_width, foas, all_anchors, gt_rois\n            )\n            for i, lvl in enumerate(range(k_min, k_max + 1)):\n                for k, v in rpn_blobs[i].items():\n                    blobs[k + '_fpn' + str(lvl)].append(v)\n        else:\n            # Classical RPN, applied to a single feature level\n            rpn_blobs = _get_rpn_blobs(\n                im_height, im_width, [foa], all_anchors, gt_rois\n            )\n            for k, v in rpn_blobs.items():\n                blobs[k].append(v)\n\n    for k, v in blobs.items():\n        if isinstance(v, list) and len(v) > 0:\n            blobs[k] = np.concatenate(v)\n\n    valid_keys = [\n        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',\n        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'\n    ]\n    minimal_roidb = [{} for _ in range(len(roidb))]\n    for i, e in enumerate(roidb):\n        for k in valid_keys:\n            if k in e:\n                minimal_roidb[i][k] = e[k]\n    blobs['roidb'] = blob_utils.serialize(minimal_roidb)\n\n    # Always return valid=True, since RPN minibatches are valid by design\n    return True\n\n\ndef _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):\n    total_anchors = all_anchors.shape[0]\n    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH\n\n    if straddle_thresh >= 0:\n        # Only keep anchors inside the image by a margin of straddle_thresh\n        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all\n        # anchors\n        inds_inside = np.where(\n            (all_anchors[:, 0] >= -straddle_thresh) &\n            (all_anchors[:, 1] >= -straddle_thresh) &\n            (all_anchors[:, 2] < im_width + straddle_thresh) &\n            (all_anchors[:, 3] < im_height + straddle_thresh)\n        )[0]\n        # keep only inside anchors\n        anchors = all_anchors[inds_inside, :]\n    else:\n        inds_inside = np.arange(all_anchors.shape[0])\n        anchors = all_anchors\n    num_inside = len(inds_inside)\n\n    logger.debug('total_anchors: {}'.format(total_anchors))\n    logger.debug('inds_inside: {}'.format(num_inside))\n    logger.debug('anchors.shape: {}'.format(anchors.shape))\n\n    # Compute anchor labels:\n    # label=1 is positive, 0 is negative, -1 is don't care (ignore)\n    labels = np.empty((num_inside, ), dtype=np.int32)\n    labels.fill(-1)\n    if len(gt_boxes) > 0:\n        # Compute overlaps between the anchors and the gt boxes overlaps\n        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)\n        # Map from anchor to gt box that has highest overlap\n        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)\n        # For each anchor, amount of overlap with most overlapping gt box\n        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),\n                                                anchor_to_gt_argmax]\n\n        # Map from gt box to an anchor that has highest overlap\n        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)\n        # For each gt box, amount of overlap with most overlapping anchor\n        gt_to_anchor_max = anchor_by_gt_overlap[\n            gt_to_anchor_argmax,\n            np.arange(anchor_by_gt_overlap.shape[1])\n        ]\n        # Find all anchors that share the max overlap amount\n        # (this includes many ties)\n        anchors_with_max_overlap = np.where(\n            anchor_by_gt_overlap == gt_to_anchor_max\n        )[0]\n\n        # Fg label: for each gt use anchors with highest overlap\n        # (including ties)\n        labels[anchors_with_max_overlap] = 1\n        # Fg label: above threshold IOU\n        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1\n\n    # subsample positive labels if we have too many\n    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)\n    fg_inds = np.where(labels == 1)[0]\n    if len(fg_inds) > num_fg:\n        disable_inds = npr.choice(\n            fg_inds, size=(len(fg_inds) - num_fg), replace=False\n        )\n        labels[disable_inds] = -1\n    fg_inds = np.where(labels == 1)[0]\n\n    # subsample negative labels if we have too many\n    # (samples with replacement, but since the set of bg inds is large most\n    # samples will not have repeats)\n    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)\n    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]\n    if len(bg_inds) > num_bg:\n        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]\n    else:\n        enable_inds = bg_inds\n\n    labels[enable_inds] = 0\n    bg_inds = np.where(labels == 0)[0]\n\n    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)\n    bbox_targets[fg_inds, :] = data_utils.compute_targets(\n        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]\n    )\n\n    # Bbox regression loss has the form:\n    #   loss(x) = weight_outside * L(weight_inside * x)\n    # Inside weights allow us to set zero loss on an element-wise basis\n    # Bbox regression is only trained on positive examples so we set their\n    # weights to 1.0 (or otherwise if config is different) and 0 otherwise\n    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)\n    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)\n\n    # The bbox regression loss only averages by the number of images in the\n    # mini-batch, whereas we need to average by the total number of example\n    # anchors selected\n    # Outside weights are used to scale each element-wise loss so the final\n    # average over the mini-batch is correct\n    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)\n    # uniform weighting of examples (given non-uniform sampling)\n    num_examples = np.sum(labels >= 0)\n    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples\n    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples\n\n    # Map up to original set of anchors\n    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)\n    bbox_targets = data_utils.unmap(\n        bbox_targets, total_anchors, inds_inside, fill=0\n    )\n    bbox_inside_weights = data_utils.unmap(\n        bbox_inside_weights, total_anchors, inds_inside, fill=0\n    )\n    bbox_outside_weights = data_utils.unmap(\n        bbox_outside_weights, total_anchors, inds_inside, fill=0\n    )\n\n    # Split the generated labels, etc. into labels per each field of anchors\n    blobs_out = []\n    start_idx = 0\n    for foa in foas:\n        H = foa.field_size\n        W = foa.field_size\n        A = foa.num_cell_anchors\n        end_idx = start_idx + H * W * A\n        _labels = labels[start_idx:end_idx]\n        _bbox_targets = bbox_targets[start_idx:end_idx, :]\n        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]\n        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]\n        start_idx = end_idx\n\n        # labels output with shape (1, A, height, width)\n        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)\n        # bbox_targets output with shape (1, 4 * A, height, width)\n        _bbox_targets = _bbox_targets.reshape(\n            (1, H, W, A * 4)).transpose(0, 3, 1, 2)\n        # bbox_inside_weights output with shape (1, 4 * A, height, width)\n        _bbox_inside_weights = _bbox_inside_weights.reshape(\n            (1, H, W, A * 4)).transpose(0, 3, 1, 2)\n        # bbox_outside_weights output with shape (1, 4 * A, height, width)\n        _bbox_outside_weights = _bbox_outside_weights.reshape(\n            (1, H, W, A * 4)).transpose(0, 3, 1, 2)\n        blobs_out.append(\n            dict(\n                rpn_labels_int32_wide=_labels,\n                rpn_bbox_targets_wide=_bbox_targets,\n                rpn_bbox_inside_weights_wide=_bbox_inside_weights,\n                rpn_bbox_outside_weights_wide=_bbox_outside_weights\n            )\n        )\n    return blobs_out[0] if len(blobs_out) == 1 else blobs_out\n"
  },
  {
    "path": "detectron/tests/data_loader_benchmark.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Example usage:\n# data_loader_benchmark.par \\\n#   NUM_GPUS 2 \\\n#   TRAIN.DATASETS \"('voc_2007_trainval',)\" \\\n#   TRAIN.PROPOSAL_FILES /path/to/voc_2007_trainval/proposals.pkl \\\n#   DATA_LOADER.NUM_THREADS 4 \\\n#   DATA_LOADER.MINIBATCH_QUEUE_SIZE 64 \\\n#   DATA_LOADER.BLOBS_QUEUE_CAPACITY 8\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport logging\nimport numpy as np\nimport pprint\nimport sys\nimport time\n\nfrom caffe2.python import core\nfrom caffe2.python import muji\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import merge_cfg_from_file\nfrom detectron.core.config import merge_cfg_from_list\nfrom detectron.datasets.roidb import combined_roidb_for_training\nfrom detectron.roi_data.loader import RoIDataLoader\nfrom detectron.utils.logging import setup_logging\nfrom detectron.utils.timer import Timer\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        '--num-batches', dest='num_batches',\n        help='Number of minibatches to run',\n        default=200, type=int)\n    parser.add_argument(\n        '--sleep', dest='sleep_time',\n        help='Seconds sleep to emulate a network running',\n        default=0.1, type=float)\n    parser.add_argument(\n        '--cfg', dest='cfg_file', help='optional config file', default=None,\n        type=str)\n    parser.add_argument(\n        '--x-factor', dest='x_factor', help='simulates x-factor more GPUs',\n        default=1, type=int)\n    parser.add_argument(\n        '--profiler', dest='profiler', help='profile minibatch load time',\n        action='store_true')\n    parser.add_argument(\n        'opts', help='See detectron/core/config.py for all options', default=None,\n        nargs=argparse.REMAINDER)\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    args = parser.parse_args()\n    return args\n\n\ndef loader_loop(roi_data_loader):\n    load_timer = Timer()\n    iters = 100\n    for i in range(iters):\n        load_timer.tic()\n        roi_data_loader.get_next_minibatch()\n        load_timer.toc()\n        print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format(\n              i + 1, iters, load_timer.average_time))\n\n\ndef main(opts):\n    logger = logging.getLogger(__name__)\n    roidb = combined_roidb_for_training(\n        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)\n    logger.info('{:d} roidb entries'.format(len(roidb)))\n    roi_data_loader = RoIDataLoader(\n        roidb,\n        num_loaders=cfg.DATA_LOADER.NUM_THREADS,\n        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,\n        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY\n    )\n    blob_names = roi_data_loader.get_output_names()\n\n    net = core.Net('dequeue_net')\n    net.type = 'dag'\n    all_blobs = []\n    for gpu_id in range(cfg.NUM_GPUS):\n        with core.NameScope('gpu_{}'.format(gpu_id)):\n            with core.DeviceScope(muji.OnGPU(gpu_id)):\n                for blob_name in blob_names:\n                    blob = core.ScopedName(blob_name)\n                    all_blobs.append(blob)\n                    workspace.CreateBlob(blob)\n                    logger.info('Creating blob: {}'.format(blob))\n                net.DequeueBlobs(\n                    roi_data_loader._blobs_queue_name, blob_names)\n    logger.info(\"Protobuf:\\n\" + str(net.Proto()))\n\n    if opts.profiler:\n        import cProfile\n        cProfile.runctx(\n            'loader_loop(roi_data_loader)', globals(), locals(),\n            sort='cumulative')\n    else:\n        loader_loop(roi_data_loader)\n\n    roi_data_loader.register_sigint_handler()\n    roi_data_loader.start(prefill=True)\n    total_time = 0\n    for i in range(opts.num_batches):\n        start_t = time.time()\n        for _ in range(opts.x_factor):\n            workspace.RunNetOnce(net)\n        total_time += (time.time() - start_t) / opts.x_factor\n        logger.info(\n            '{:d}/{:d}: Averge dequeue time: {:.3f}s  [{:d}/{:d}]'.format(\n                i + 1, opts.num_batches, total_time / (i + 1),\n                roi_data_loader._minibatch_queue.qsize(),\n                cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE\n            )\n        )\n        # Sleep to simulate the time taken by running a little network\n        time.sleep(opts.sleep_time)\n        # To inspect:\n        # blobs = workspace.FetchBlobs(all_blobs)\n        # from IPython import embed; embed()\n    logger.info('Shutting down data loader...')\n    roi_data_loader.shutdown()\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    logger = setup_logging(__name__)\n    logger.setLevel(logging.DEBUG)\n    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)\n    np.random.seed(cfg.RNG_SEED)\n    args = parse_args()\n    logger.info('Called with args:')\n    logger.info(args)\n    if args.cfg_file is not None:\n        merge_cfg_from_file(args.cfg_file)\n    if args.opts is not None:\n        merge_cfg_from_list(args.opts)\n    assert_and_infer_cfg()\n    logger.info('Running with config:')\n    logger.info(pprint.pformat(cfg))\n    main(args)\n"
  },
  {
    "path": "detectron/tests/test_batch_permutation_op.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport unittest\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import gradient_checker\nfrom caffe2.python import workspace\n\nimport detectron.utils.logging as logging_utils\nimport detectron.utils.c2 as c2_utils\n\n\nclass BatchPermutationOpTest(unittest.TestCase):\n    def _run_op_test(self, X, I, check_grad=False):\n        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):\n            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])\n            workspace.FeedBlob('X', X)\n            workspace.FeedBlob('I', I)\n        workspace.RunOperatorOnce(op)\n        Y = workspace.FetchBlob('Y')\n\n        if check_grad:\n            gc = gradient_checker.GradientChecker(\n                stepsize=0.1,\n                threshold=0.001,\n                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)\n            )\n\n            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])\n            self.assertTrue(res, 'Grad check failed')\n\n        Y_ref = X[I]\n        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)\n\n    def _run_speed_test(self, iters=5, N=1024):\n        \"\"\"This function provides an example of how to benchmark custom\n        operators using the Caffe2 'prof_dag' network execution type. Please\n        note that for 'prof_dag' to work, Caffe2 must be compiled with profiling\n        support using the `-DUSE_PROF=ON` option passed to `cmake` when building\n        Caffe2.\n        \"\"\"\n        net = core.Net('test')\n        net.Proto().type = 'prof_dag'\n        net.Proto().num_workers = 2\n        Y = net.BatchPermutation(['X', 'I'], 'Y')\n        Y_flat = net.FlattenToVec([Y], 'Y_flat')\n        loss = net.AveragedLoss([Y_flat], 'loss')\n        net.AddGradientOperators([loss])\n        workspace.CreateNet(net)\n\n        X = np.random.randn(N, 256, 14, 14)\n        for _i in range(iters):\n            I = np.random.permutation(N)\n            workspace.FeedBlob('X', X.astype(np.float32))\n            workspace.FeedBlob('I', I.astype(np.int32))\n            workspace.RunNet(net.Proto().name)\n            np.testing.assert_allclose(\n                workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08\n            )\n\n    def test_forward_and_gradient(self):\n        A = np.random.randn(2, 3, 5, 7).astype(np.float32)\n        I = np.array([0, 1], dtype=np.int32)\n        self._run_op_test(A, I, check_grad=True)\n\n        A = np.random.randn(2, 3, 5, 7).astype(np.float32)\n        I = np.array([1, 0], dtype=np.int32)\n        self._run_op_test(A, I, check_grad=True)\n\n        A = np.random.randn(10, 3, 5, 7).astype(np.float32)\n        I = np.array(np.random.permutation(10), dtype=np.int32)\n        self._run_op_test(A, I, check_grad=True)\n\n    def test_size_exceptions(self):\n        A = np.random.randn(2, 256, 42, 86).astype(np.float32)\n        I = np.array(np.random.permutation(10), dtype=np.int32)\n        with self.assertRaises(RuntimeError):\n            self._run_op_test(A, I)\n\n    # See doc string in _run_speed_test\n    # def test_perf(self):\n    #     with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):\n    #         self._run_speed_test()\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    c2_utils.import_detectron_ops()\n    assert 'BatchPermutation' in workspace.RegisteredOperators()\n    logging_utils.setup_logging(__name__)\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_bbox_transform.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport unittest\n\nfrom pycocotools import mask as COCOmask\n\nimport detectron.utils.boxes as box_utils\n\n\ndef random_boxes(mean_box, stdev, N):\n    boxes = np.random.randn(N, 4) * stdev + mean_box\n    return boxes.astype(dtype=np.float32)\n\n\nclass TestBboxTransform(unittest.TestCase):\n    def test_bbox_transform_and_inverse(self):\n        weights = (5, 5, 10, 10)\n        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)\n        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)\n        deltas = box_utils.bbox_transform_inv(\n            src_boxes, dst_boxes, weights=weights\n        )\n        dst_boxes_reconstructed = box_utils.bbox_transform(\n            src_boxes, deltas, weights=weights\n        )\n        np.testing.assert_array_almost_equal(\n            dst_boxes, dst_boxes_reconstructed, decimal=5\n        )\n\n    def test_bbox_dataset_to_prediction_roundtrip(self):\n        \"\"\"Simulate the process of reading a ground-truth box from a dataset,\n        make predictions from proposals, convert the predictions back to the\n        dataset format, and then use the COCO API to compute IoU overlap between\n        the gt box and the predictions. These should have IoU of 1.\n        \"\"\"\n        weights = (5, 5, 10, 10)\n        # 1/ \"read\" a box from a dataset in the default (x1, y1, w, h) format\n        gt_xywh_box = [10, 20, 100, 150]\n        # 2/ convert it to our internal (x1, y1, x2, y2) format\n        gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)\n        # 3/ consider nearby proposal boxes\n        prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)\n        # 4/ compute proposal-to-gt transformation deltas\n        deltas = box_utils.bbox_transform_inv(\n            prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights\n        )\n        # 5/ use deltas to transform proposals to xyxy predicted box\n        pred_xyxy_boxes = box_utils.bbox_transform(\n            prop_xyxy_boxes, deltas, weights=weights\n        )\n        # 6/ convert xyxy predicted box to xywh predicted box\n        pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)\n        # 7/ use COCO API to compute IoU\n        not_crowd = [int(False)] * pred_xywh_boxes.shape[0]\n        ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)\n        np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))\n\n    def test_cython_bbox_iou_against_coco_api_bbox_iou(self):\n        \"\"\"Check that our cython implementation of bounding box IoU overlap\n        matches the COCO API implementation.\n        \"\"\"\n        def _do_test(b1, b2):\n            # Compute IoU overlap with the cython implementation\n            cython_iou = box_utils.bbox_overlaps(b1, b2)\n            # Compute IoU overlap with the COCO API implementation\n            # (requires converting boxes from xyxy to xywh format)\n            xywh_b1 = box_utils.xyxy_to_xywh(b1)\n            xywh_b2 = box_utils.xyxy_to_xywh(b2)\n            not_crowd = [int(False)] * b2.shape[0]\n            coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)\n            # IoUs should be similar\n            np.testing.assert_array_almost_equal(\n                cython_iou, coco_ious, decimal=5\n            )\n\n        # Test small boxes\n        b1 = random_boxes([10, 10, 20, 20], 5, 10)\n        b2 = random_boxes([10, 10, 20, 20], 5, 10)\n        _do_test(b1, b2)\n\n        # Test bigger boxes\n        b1 = random_boxes([10, 10, 110, 20], 20, 10)\n        b2 = random_boxes([10, 10, 110, 20], 20, 10)\n        _do_test(b1, b2)\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_cfg.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\nimport tempfile\nimport unittest\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.collections import AttrDict\nimport detectron.core.config as core_config\nimport detectron.utils.env as envu\nimport detectron.utils.logging as logging_utils\n\n\nclass TestAttrDict(unittest.TestCase):\n    def test_immutability(self):\n        # Top level immutable\n        a = AttrDict()\n        a.foo = 0\n        a.immutable(True)\n        with self.assertRaises(AttributeError):\n            a.foo = 1\n            a.bar = 1\n        assert a.is_immutable()\n        assert a.foo == 0\n        a.immutable(False)\n        assert not a.is_immutable()\n        a.foo = 1\n        assert a.foo == 1\n\n        # Recursively immutable\n        a.level1 = AttrDict()\n        a.level1.foo = 0\n        a.level1.level2 = AttrDict()\n        a.level1.level2.foo = 0\n        a.immutable(True)\n        assert a.is_immutable()\n        with self.assertRaises(AttributeError):\n            a.level1.level2.foo = 1\n            a.level1.bar = 1\n        assert a.level1.level2.foo == 0\n\n        # Serialize immutability state\n        a.immutable(True)\n        a2 = core_config.load_cfg(envu.yaml_dump(a))\n        assert a.is_immutable()\n        assert a2.is_immutable()\n\n\nclass TestCfg(unittest.TestCase):\n    def test_copy_cfg(self):\n        cfg2 = copy.deepcopy(cfg)\n        s = cfg.MODEL.TYPE\n        cfg2.MODEL.TYPE = 'dummy'\n        assert cfg.MODEL.TYPE == s\n\n    def test_merge_cfg_from_cfg(self):\n        # Test: merge from deepcopy\n        s = 'dummy0'\n        cfg2 = copy.deepcopy(cfg)\n        cfg2.MODEL.TYPE = s\n        core_config.merge_cfg_from_cfg(cfg2)\n        assert cfg.MODEL.TYPE == s\n\n        # Test: merge from yaml\n        s = 'dummy1'\n        cfg2 = core_config.load_cfg(envu.yaml_dump(cfg))\n        cfg2.MODEL.TYPE = s\n        core_config.merge_cfg_from_cfg(cfg2)\n        assert cfg.MODEL.TYPE == s\n\n        # Test: merge with a valid key\n        s = 'dummy2'\n        cfg2 = AttrDict()\n        cfg2.MODEL = AttrDict()\n        cfg2.MODEL.TYPE = s\n        core_config.merge_cfg_from_cfg(cfg2)\n        assert cfg.MODEL.TYPE == s\n\n        # Test: merge with an invalid key\n        s = 'dummy3'\n        cfg2 = AttrDict()\n        cfg2.FOO = AttrDict()\n        cfg2.FOO.BAR = s\n        with self.assertRaises(KeyError):\n            core_config.merge_cfg_from_cfg(cfg2)\n\n        # Test: merge with converted type\n        cfg2 = AttrDict()\n        cfg2.TRAIN = AttrDict()\n        cfg2.TRAIN.SCALES = [1]\n        core_config.merge_cfg_from_cfg(cfg2)\n        assert type(cfg.TRAIN.SCALES) is tuple\n        assert cfg.TRAIN.SCALES[0] == 1\n\n        # Test: merge with invalid type\n        cfg2 = AttrDict()\n        cfg2.TRAIN = AttrDict()\n        cfg2.TRAIN.SCALES = 1\n        with self.assertRaises(ValueError):\n            core_config.merge_cfg_from_cfg(cfg2)\n\n    def test_merge_cfg_from_file(self):\n        with tempfile.NamedTemporaryFile() as f:\n            envu.yaml_dump(cfg, f)\n            s = cfg.MODEL.TYPE\n            cfg.MODEL.TYPE = 'dummy'\n            assert cfg.MODEL.TYPE != s\n            core_config.merge_cfg_from_file(f.name)\n            assert cfg.MODEL.TYPE == s\n\n    def test_merge_cfg_from_list(self):\n        opts = [\n            'TRAIN.SCALES', '(100, )', 'MODEL.TYPE', u'foobar', 'NUM_GPUS', 2\n        ]\n        assert len(cfg.TRAIN.SCALES) > 0\n        assert cfg.TRAIN.SCALES[0] != 100\n        assert cfg.MODEL.TYPE != 'foobar'\n        assert cfg.NUM_GPUS != 2\n        core_config.merge_cfg_from_list(opts)\n        assert type(cfg.TRAIN.SCALES) is tuple\n        assert len(cfg.TRAIN.SCALES) == 1\n        assert cfg.TRAIN.SCALES[0] == 100\n        assert cfg.MODEL.TYPE == 'foobar'\n        assert cfg.NUM_GPUS == 2\n\n    def test_deprecated_key_from_list(self):\n        # You should see logger messages like:\n        #   \"Deprecated config key (ignoring): MODEL.DILATION\"\n        opts = ['FINAL_MSG', 'foobar', 'MODEL.DILATION', 2]\n        with self.assertRaises(AttributeError):\n            _ = cfg.FINAL_MSG  # noqa\n        with self.assertRaises(AttributeError):\n            _ = cfg.MODEL.DILATION  # noqa\n        core_config.merge_cfg_from_list(opts)\n        with self.assertRaises(AttributeError):\n            _ = cfg.FINAL_MSG  # noqa\n        with self.assertRaises(AttributeError):\n            _ = cfg.MODEL.DILATION  # noqa\n\n    def test_deprecated_key_from_file(self):\n        # You should see logger messages like:\n        #   \"Deprecated config key (ignoring): MODEL.DILATION\"\n        with tempfile.NamedTemporaryFile() as f:\n            cfg2 = copy.deepcopy(cfg)\n            cfg2.MODEL.DILATION = 2\n            envu.yaml_dump(cfg2, f)\n            with self.assertRaises(AttributeError):\n                _ = cfg.MODEL.DILATION  # noqa\n            core_config.merge_cfg_from_file(f.name)\n            with self.assertRaises(AttributeError):\n                _ = cfg.MODEL.DILATION  # noqa\n\n    def test_renamed_key_from_list(self):\n        # You should see logger messages like:\n        #  \"Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;\n        #  please update your config\"\n        opts = ['EXAMPLE.RENAMED.KEY', 'foobar']\n        with self.assertRaises(AttributeError):\n            _ = cfg.EXAMPLE.RENAMED.KEY  # noqa\n        with self.assertRaises(KeyError):\n            core_config.merge_cfg_from_list(opts)\n\n    def test_renamed_key_from_file(self):\n        # You should see logger messages like:\n        #  \"Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;\n        #  please update your config\"\n        with tempfile.NamedTemporaryFile() as f:\n            cfg2 = copy.deepcopy(cfg)\n            cfg2.EXAMPLE = AttrDict()\n            cfg2.EXAMPLE.RENAMED = AttrDict()\n            cfg2.EXAMPLE.RENAMED.KEY = 'foobar'\n            envu.yaml_dump(cfg2, f)\n            with self.assertRaises(AttributeError):\n                _ = cfg.EXAMPLE.RENAMED.KEY  # noqa\n            with self.assertRaises(KeyError):\n                core_config.merge_cfg_from_file(f.name)\n\n\nif __name__ == '__main__':\n    logging_utils.setup_logging(__name__)\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_loader.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport logging\nimport unittest\nimport unittest.mock as mock\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import muji\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.roi_data.loader import RoIDataLoader\nimport detectron.utils.logging as logging_utils\n\n\ndef get_roidb_blobs(roidb):\n    blobs = {}\n    blobs['data'] = np.stack([entry['data'] for entry in roidb])\n    return blobs, True\n\n\ndef get_net(data_loader, name):\n    logger = logging.getLogger(__name__)\n    blob_names = data_loader.get_output_names()\n    net = core.Net(name)\n    net.type = 'dag'\n    for gpu_id in range(cfg.NUM_GPUS):\n        with core.NameScope('gpu_{}'.format(gpu_id)):\n            with core.DeviceScope(muji.OnGPU(gpu_id)):\n                for blob_name in blob_names:\n                    blob = core.ScopedName(blob_name)\n                    workspace.CreateBlob(blob)\n                net.DequeueBlobs(\n                    data_loader._blobs_queue_name, blob_names)\n    logger.info(\"Protobuf:\\n\" + str(net.Proto()))\n\n    return net\n\n\ndef get_roidb_sample_data(sample_data):\n    roidb = []\n    for _ in range(np.random.randint(4, 10)):\n        roidb.append({'data': sample_data})\n    return roidb\n\n\ndef create_loader_and_network(sample_data, name):\n    roidb = get_roidb_sample_data(sample_data)\n    loader = RoIDataLoader(roidb)\n    net = get_net(loader, 'dequeue_net_train')\n    loader.register_sigint_handler()\n    loader.start(prefill=False)\n    return loader, net\n\n\ndef run_net(net):\n    workspace.RunNetOnce(net)\n    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, 0)\n    name_scope = 'gpu_{}'.format(0)\n    with core.NameScope(name_scope):\n        with core.DeviceScope(gpu_dev):\n            data = workspace.FetchBlob(core.ScopedName('data'))\n            return data\n\n\nclass TestRoIDataLoader(unittest.TestCase):\n    @mock.patch(\n        'detectron.roi_data.loader.get_minibatch_blob_names',\n        return_value=[u'data']\n    )\n    @mock.patch(\n        'detectron.roi_data.loader.get_minibatch',\n        side_effect=get_roidb_blobs\n    )\n    def test_two_parallel_loaders(self, _1, _2):\n        train_data = np.random.rand(2, 3, 3).astype(np.float32)\n        train_loader, train_net = create_loader_and_network(train_data,\n                                                            'dequeue_net_train')\n        test_data = np.random.rand(2, 4, 4).astype(np.float32)\n        test_loader, test_net = create_loader_and_network(test_data,\n                                                          'dequeue_net_test')\n        for _ in range(5):\n            data = run_net(train_net)\n            self.assertEqual(data[0].tolist(), train_data.tolist())\n            data = run_net(test_net)\n            self.assertEqual(data[0].tolist(), test_data.tolist())\n        test_loader.shutdown()\n        train_loader.shutdown()\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    logger = logging_utils.setup_logging(__name__)\n    logger.setLevel(logging.DEBUG)\n    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)\n    np.random.seed(cfg.RNG_SEED)\n    cfg.TRAIN.ASPECT_GROUPING = False\n    cfg.NUM_GPUS = 2\n    assert_and_infer_cfg()\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_restore_checkpoint.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport logging\nimport numpy as np\nimport os\nimport shutil\nimport tempfile\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import get_output_dir\nfrom detectron.datasets.roidb import combined_roidb_for_training\nfrom detectron.modeling import model_builder\nfrom detectron.utils.logging import setup_logging\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.net as nu\n\nc2_utils.import_detectron_ops()\n\n\ndef get_params(model):\n    blobs = {}  # gpu_0 blobs with unscoped_name as key\n    all_blobs = {}  # all blobs with scoped name as key\n    # Save all parameters\n    for param in model.params:\n        scoped_name = str(param)\n        unscoped_name = c2_utils.UnscopeName(scoped_name)\n        if 'gpu_0' in scoped_name:\n            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)\n        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)\n    for param in model.TrainableParams():\n        scoped_name = str(param) + '_momentum'\n        unscoped_name = c2_utils.UnscopeName(scoped_name)\n        if 'gpu_0' in scoped_name:\n            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)\n        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)\n    return blobs, all_blobs\n\n\ndef add_momentum_init_ops(model):\n    for param in model.TrainableParams(gpu_id=0):\n        model.param_init_net.GaussianFill(\n            [param + '_momentum'], param + '_momentum', mean=0.0, std=1.0)\n\n\ndef init_weights(model):\n    # init weights in gpu_id = 0 and then broadcast\n    workspace.RunNetOnce(model.param_init_net)\n    nu.broadcast_parameters(model)\n\n\ndef test_restore_checkpoint():\n    # Create Model\n    model = model_builder.create(cfg.MODEL.TYPE, train=True)\n    add_momentum_init_ops(model)\n    init_weights(model)\n    # Fill input blobs\n    roidb = combined_roidb_for_training(\n        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES\n    )\n    model_builder.add_training_inputs(model, roidb=roidb)\n    workspace.CreateNet(model.net)\n    # Bookkeeping for checkpoint creation\n    iter_num = 0\n    checkpoints = {}\n    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)\n    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))\n    checkpoints[iter_num] = chk_file_path\n    # Save model weights\n    nu.save_model_to_weights_file(checkpoints[iter_num], model)\n    orig_gpu_0_params, orig_all_params = get_params(model)\n    # Change the model weights\n    init_weights(model)\n    # Reload the weights in the model\n    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)\n    nu.broadcast_parameters(model)\n    shutil.rmtree(cfg.OUTPUT_DIR)\n    _, restored_all_params = get_params(model)\n    # Check if all params are loaded correctly\n    for scoped_name, blob in orig_all_params.items():\n        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])\n    # Check if broadcast_parameters works\n    for scoped_name, blob in restored_all_params.items():\n        unscoped_name = c2_utils.UnscopeName(scoped_name)\n        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    logger = setup_logging(__name__)\n    logger.setLevel(logging.DEBUG)\n    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)\n    np.random.seed(cfg.RNG_SEED)\n    output_dir = tempfile.mkdtemp()\n    # Generate config for test\n    cfg.MODEL.TYPE = 'generalized_rcnn'\n    cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body'\n    cfg.MODEL.NUM_CLASSES = 81\n    cfg.MODEL.FASTER_RCNN = True\n    cfg.FPN.FPN_ON = True\n    cfg.FPN.MULTILEVEL_ROIS = True\n    cfg.FPN.MULTILEVEL_RPN = True\n    cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head'\n    cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign'\n    cfg.OUTPUT_DIR = output_dir\n    cfg.TRAIN.DATASETS = ('coco_2014_minival',)\n    cfg.TRAIN.WEIGHTS = b''\n    for num_gpu in range(workspace.NumCudaDevices()):\n        cfg.immutable(False)\n        cfg.NUM_GPUS = num_gpu + 1\n        assert_and_infer_cfg()\n        test_restore_checkpoint()\n"
  },
  {
    "path": "detectron/tests/test_smooth_l1_loss_op.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport unittest\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import gradient_checker\nfrom caffe2.python import workspace\n\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.logging as logging_utils\n\n\nclass SmoothL1LossTest(unittest.TestCase):\n    def test_forward_and_gradient(self):\n        Y = np.random.randn(128, 4 * 21).astype(np.float32)\n        Y_hat = np.random.randn(128, 4 * 21).astype(np.float32)\n        inside_weights = np.random.randn(128, 4 * 21).astype(np.float32)\n        inside_weights[inside_weights < 0] = 0\n        outside_weights = np.random.randn(128, 4 * 21).astype(np.float32)\n        outside_weights[outside_weights < 0] = 0\n        scale = np.random.random()\n        beta = np.random.random()\n\n        op = core.CreateOperator(\n            'SmoothL1Loss', ['Y_hat', 'Y', 'inside_weights', 'outside_weights'],\n            ['loss'],\n            scale=scale,\n            beta=beta\n        )\n\n        gc = gradient_checker.GradientChecker(\n            stepsize=0.005,\n            threshold=0.005,\n            device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)\n        )\n\n        res, grad, grad_estimated = gc.CheckSimple(\n            op, [Y_hat, Y, inside_weights, outside_weights], 0, [0]\n        )\n\n        self.assertTrue(\n            grad.shape == grad_estimated.shape,\n            'Fail check: grad.shape != grad_estimated.shape'\n        )\n\n        # To inspect the gradient and estimated gradient:\n        # np.set_printoptions(precision=3, suppress=True)\n        # print('grad:')\n        # print(grad)\n        # print('grad_estimated:')\n        # print(grad_estimated)\n\n        self.assertTrue(res)\n\n\nif __name__ == '__main__':\n    c2_utils.import_detectron_ops()\n    assert 'SmoothL1Loss' in workspace.RegisteredOperators()\n    logging_utils.setup_logging(__name__)\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_spatial_narrow_as_op.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport unittest\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import gradient_checker\nfrom caffe2.python import workspace\n\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.logging as logging_utils\n\n\nclass SpatialNarrowAsOpTest(unittest.TestCase):\n    def _run_test(self, A, B, check_grad=False):\n        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):\n            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])\n            workspace.FeedBlob('A', A)\n            workspace.FeedBlob('B', B)\n        workspace.RunOperatorOnce(op)\n        C = workspace.FetchBlob('C')\n\n        if check_grad:\n            gc = gradient_checker.GradientChecker(\n                stepsize=0.005,\n                threshold=0.005,\n                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)\n            )\n\n            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])\n            self.assertTrue(res, 'Grad check failed')\n\n        dims = C.shape\n        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]\n        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)\n\n    def test_small_forward_and_gradient(self):\n        A = np.random.randn(2, 3, 5, 7).astype(np.float32)\n        B = np.random.randn(2, 3, 2, 2).astype(np.float32)\n        self._run_test(A, B, check_grad=True)\n\n        A = np.random.randn(2, 3, 5, 7).astype(np.float32)\n        B = np.random.randn(2, 3, 5).astype(np.float32)\n        self._run_test(A, B, check_grad=True)\n\n    def test_large_forward(self):\n        A = np.random.randn(2, 256, 42, 100).astype(np.float32)\n        B = np.random.randn(2, 256, 35, 87).astype(np.float32)\n        self._run_test(A, B)\n\n        A = np.random.randn(2, 256, 42, 87).astype(np.float32)\n        B = np.random.randn(2, 256, 35, 87).astype(np.float32)\n        self._run_test(A, B)\n\n    def test_size_exceptions(self):\n        A = np.random.randn(2, 256, 42, 86).astype(np.float32)\n        B = np.random.randn(2, 256, 35, 87).astype(np.float32)\n        with self.assertRaises(RuntimeError):\n            self._run_test(A, B)\n\n        A = np.random.randn(2, 255, 42, 88).astype(np.float32)\n        B = np.random.randn(2, 256, 35, 87).astype(np.float32)\n        with self.assertRaises(RuntimeError):\n            self._run_test(A, B)\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    c2_utils.import_detectron_ops()\n    assert 'SpatialNarrowAs' in workspace.RegisteredOperators()\n    logging_utils.setup_logging(__name__)\n    unittest.main()\n"
  },
  {
    "path": "detectron/tests/test_zero_even_op.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport unittest\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import workspace\n\nimport detectron.utils.c2 as c2_utils\n\n\nclass ZeroEvenOpTest(unittest.TestCase):\n\n    def _run_zero_even_op(self, X):\n        op = core.CreateOperator('ZeroEven', ['X'], ['Y'])\n        workspace.FeedBlob('X', X)\n        workspace.RunOperatorOnce(op)\n        Y = workspace.FetchBlob('Y')\n        return Y\n\n    def _run_zero_even_op_gpu(self, X):\n        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):\n            op = core.CreateOperator('ZeroEven', ['X'], ['Y'])\n            workspace.FeedBlob('X', X)\n        workspace.RunOperatorOnce(op)\n        Y = workspace.FetchBlob('Y')\n        return Y\n\n    def test_throws_on_non_1D_arrays(self):\n        X = np.zeros((2, 2), dtype=np.float32)\n        with self.assertRaisesRegex(RuntimeError, 'X\\.ndim\\(\\) == 1'):\n            self._run_zero_even_op(X)\n\n    def test_handles_empty_arrays(self):\n        X = np.array([], dtype=np.float32)\n        Y_exp = np.copy(X)\n        Y_act = self._run_zero_even_op(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n    def test_sets_vals_at_even_inds_to_zero(self):\n        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)\n        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)\n        Y_act = self._run_zero_even_op(X)\n        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])\n\n    def test_preserves_vals_at_odd_inds(self):\n        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)\n        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)\n        Y_act = self._run_zero_even_op(X)\n        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])\n\n    def test_handles_even_length_arrays(self):\n        X = np.random.rand(64).astype(np.float32)\n        Y_exp = np.copy(X)\n        Y_exp[0::2] = 0.0\n        Y_act = self._run_zero_even_op(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n    def test_handles_odd_length_arrays(self):\n        X = np.random.randn(77).astype(np.float32)\n        Y_exp = np.copy(X)\n        Y_exp[0::2] = 0.0\n        Y_act = self._run_zero_even_op(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n    def test_gpu_throws_on_non_1D_arrays(self):\n        X = np.zeros((2, 2), dtype=np.float32)\n        with self.assertRaisesRegex(RuntimeError, 'X\\.ndim\\(\\) == 1'):\n            self._run_zero_even_op_gpu(X)\n\n    def test_gpu_handles_empty_arrays(self):\n        X = np.array([], dtype=np.float32)\n        Y_exp = np.copy(X)\n        Y_act = self._run_zero_even_op_gpu(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n    def test_gpu_sets_vals_at_even_inds_to_zero(self):\n        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)\n        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)\n        Y_act = self._run_zero_even_op_gpu(X)\n        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])\n\n    def test_gpu_preserves_vals_at_odd_inds(self):\n        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)\n        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)\n        Y_act = self._run_zero_even_op_gpu(X)\n        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])\n\n    def test_gpu_handles_even_length_arrays(self):\n        X = np.random.rand(64).astype(np.float32)\n        Y_exp = np.copy(X)\n        Y_exp[0::2] = 0.0\n        Y_act = self._run_zero_even_op_gpu(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n    def test_gpu_handles_odd_length_arrays(self):\n        X = np.random.randn(77).astype(np.float32)\n        Y_exp = np.copy(X)\n        Y_exp[0::2] = 0.0\n        Y_act = self._run_zero_even_op_gpu(X)\n        np.testing.assert_allclose(Y_act, Y_exp)\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    c2_utils.import_custom_ops()\n    assert 'ZeroEven' in workspace.RegisteredOperators()\n    unittest.main()\n"
  },
  {
    "path": "detectron/utils/__init__.py",
    "content": ""
  },
  {
    "path": "detectron/utils/blob.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Caffe2 blob helper functions.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport numpy as np\nfrom six.moves import cPickle as pickle\n\nfrom caffe2.proto import caffe2_pb2\n\nfrom detectron.core.config import cfg\n\n\ndef get_image_blob(im, target_scale, target_max_size):\n    \"\"\"Convert an image into a network input.\n\n    Arguments:\n        im (ndarray): a color image in BGR order\n\n    Returns:\n        blob (ndarray): a data blob holding an image pyramid\n        im_scale (float): image scale (target size) / (original size)\n        im_info (ndarray)\n    \"\"\"\n    processed_im, im_scale = prep_im_for_blob(\n        im, cfg.PIXEL_MEANS, target_scale, target_max_size\n    )\n    blob = im_list_to_blob(processed_im)\n    # NOTE: this height and width may be larger than actual scaled input image\n    # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are\n    # maintaining this behavior for now to make existing results exactly\n    # reproducible (in practice using the true input image height and width\n    # yields nearly the same results, but they are sometimes slightly different\n    # because predictions near the edge of the image will be pruned more\n    # aggressively).\n    height, width = blob.shape[2], blob.shape[3]\n    im_info = np.hstack((height, width, im_scale))[np.newaxis, :]\n    return blob, im_scale, im_info.astype(np.float32)\n\n\ndef im_list_to_blob(ims):\n    \"\"\"Convert a list of images into a network input. Assumes images were\n    prepared using prep_im_for_blob or equivalent: i.e.\n      - BGR channel order\n      - pixel means subtracted\n      - resized to the desired input size\n      - float32 numpy ndarray format\n    Output is a 4D HCHW tensor of the images concatenated along axis 0 with\n    shape.\n    \"\"\"\n    if not isinstance(ims, list):\n        ims = [ims]\n    max_shape = np.array([im.shape for im in ims]).max(axis=0)\n    # Pad the image so they can be divisible by a stride\n    if cfg.FPN.FPN_ON:\n        stride = float(cfg.FPN.COARSEST_STRIDE)\n        max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)\n        max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)\n\n    num_images = len(ims)\n    blob = np.zeros(\n        (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32\n    )\n    for i in range(num_images):\n        im = ims[i]\n        blob[i, 0:im.shape[0], 0:im.shape[1], :] = im\n    # Move channels (axis 3) to axis 1\n    # Axis order will become: (batch elem, channel, height, width)\n    channel_swap = (0, 3, 1, 2)\n    blob = blob.transpose(channel_swap)\n    return blob\n\n\ndef prep_im_for_blob(im, pixel_means, target_size, max_size):\n    \"\"\"Prepare an image for use as a network input blob. Specially:\n      - Subtract per-channel pixel mean\n      - Convert to float32\n      - Rescale to each of the specified target size (capped at max_size)\n    Returns a list of transformed images, one for each target size. Also returns\n    the scale factors that were used to compute each returned image.\n    \"\"\"\n    im = im.astype(np.float32, copy=False)\n    im -= pixel_means\n    im_shape = im.shape\n    im_size_min = np.min(im_shape[0:2])\n    im_size_max = np.max(im_shape[0:2])\n    im_scale = float(target_size) / float(im_size_min)\n    # Prevent the biggest axis from being more than max_size\n    if np.round(im_scale * im_size_max) > max_size:\n        im_scale = float(max_size) / float(im_size_max)\n    im = cv2.resize(\n        im,\n        None,\n        None,\n        fx=im_scale,\n        fy=im_scale,\n        interpolation=cv2.INTER_LINEAR\n    )\n    return im, im_scale\n\n\ndef zeros(shape, int32=False):\n    \"\"\"Return a blob of all zeros of the given shape with the correct float or\n    int data type.\n    \"\"\"\n    return np.zeros(shape, dtype=np.int32 if int32 else np.float32)\n\n\ndef ones(shape, int32=False):\n    \"\"\"Return a blob of all ones of the given shape with the correct float or\n    int data type.\n    \"\"\"\n    return np.ones(shape, dtype=np.int32 if int32 else np.float32)\n\n\ndef py_op_copy_blob(blob_in, blob_out):\n    \"\"\"Copy a numpy ndarray given as blob_in into the Caffe2 CPUTensor blob\n    given as blob_out. Supports float32 and int32 blob data types. This function\n    is intended for copying numpy data into a Caffe2 blob in PythonOps.\n    \"\"\"\n    # Some awkward voodoo required by Caffe2 to support int32 blobs\n    needs_int32_init = False\n    try:\n        _ = blob.data.dtype  # noqa\n    except Exception:\n        needs_int32_init = blob_in.dtype == np.int32\n    if needs_int32_init:\n        # init can only take a list (failed on tuple)\n        blob_out.init(list(blob_in.shape), caffe2_pb2.TensorProto.INT32)\n    else:\n        blob_out.reshape(blob_in.shape)\n    blob_out.data[...] = blob_in\n\n\ndef get_loss_gradients(model, loss_blobs):\n    \"\"\"Generate a gradient of 1 for each loss specified in 'loss_blobs'\"\"\"\n    loss_gradients = {}\n    for b in loss_blobs:\n        loss_grad = model.net.ConstantFill(b, [b + '_grad'], value=1.0)\n        loss_gradients[str(b)] = str(loss_grad)\n    return loss_gradients\n\n\ndef serialize(obj):\n    \"\"\"Serialize a Python object using pickle and encode it as an array of\n    float32 values so that it can be feed into the workspace. See deserialize().\n    \"\"\"\n    return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)\n\n\ndef deserialize(arr):\n    \"\"\"Unserialize a Python object from an array of float32 values fetched from\n    a workspace. See serialize().\n    \"\"\"\n    return pickle.loads(arr.astype(np.uint8).tobytes())\n"
  },
  {
    "path": "detectron/utils/boxes.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Box manipulation functions. The internal Detectron box format is\n[x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2)\nspecify the bottom-right box corner. Boxes from external sources, e.g.,\ndatasets, may be in other formats (such as [x, y, w, h]) and require conversion.\n\nThis module uses a convention that may seem strange at first: the width of a box\nis computed as x2 - x1 + 1 (likewise for height). The \"+ 1\" dates back to old\nobject detection days when the coordinates were integer pixel indices, rather\nthan floating point coordinates in a subpixel coordinate frame. A box with x2 =\nx1 and y2 = y1 was taken to include a single pixel, having a width of 1, and\nhence requiring the \"+ 1\". Now, most datasets will likely provide boxes with\nfloating point coordinates and the width should be more reasonably computed as\nx2 - x1.\n\nIn practice, as long as a model is trained and tested with a consistent\nconvention either decision seems to be ok (at least in our experience on COCO).\nSince we have a long history of training models with the \"+ 1\" convention, we\nare reluctant to change it even if our modern tastes prefer not to use it.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.cython_bbox as cython_bbox\nimport detectron.utils.cython_nms as cython_nms\n\nbbox_overlaps = cython_bbox.bbox_overlaps\n\n\ndef boxes_area(boxes):\n    \"\"\"Compute the area of an array of boxes.\"\"\"\n    w = (boxes[:, 2] - boxes[:, 0] + 1)\n    h = (boxes[:, 3] - boxes[:, 1] + 1)\n    areas = w * h\n    assert np.all(areas >= 0), 'Negative areas founds'\n    return areas\n\n\ndef unique_boxes(boxes, scale=1.0):\n    \"\"\"Return indices of unique boxes.\"\"\"\n    v = np.array([1, 1e3, 1e6, 1e9])\n    hashes = np.round(boxes * scale).dot(v)\n    _, index = np.unique(hashes, return_index=True)\n    return np.sort(index)\n\n\ndef xywh_to_xyxy(xywh):\n    \"\"\"Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format.\"\"\"\n    if isinstance(xywh, (list, tuple)):\n        # Single box given as a list of coordinates\n        assert len(xywh) == 4\n        x1, y1 = xywh[0], xywh[1]\n        x2 = x1 + np.maximum(0., xywh[2] - 1.)\n        y2 = y1 + np.maximum(0., xywh[3] - 1.)\n        return (x1, y1, x2, y2)\n    elif isinstance(xywh, np.ndarray):\n        # Multiple boxes given as a 2D ndarray\n        return np.hstack(\n            (xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1))\n        )\n    else:\n        raise TypeError('Argument xywh must be a list, tuple, or numpy array.')\n\n\ndef xyxy_to_xywh(xyxy):\n    \"\"\"Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format.\"\"\"\n    if isinstance(xyxy, (list, tuple)):\n        # Single box given as a list of coordinates\n        assert len(xyxy) == 4\n        x1, y1 = xyxy[0], xyxy[1]\n        w = xyxy[2] - x1 + 1\n        h = xyxy[3] - y1 + 1\n        return (x1, y1, w, h)\n    elif isinstance(xyxy, np.ndarray):\n        # Multiple boxes given as a 2D ndarray\n        return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))\n    else:\n        raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')\n\n\ndef filter_small_boxes(boxes, min_size):\n    \"\"\"Keep boxes with width and height both greater than min_size.\"\"\"\n    w = boxes[:, 2] - boxes[:, 0] + 1\n    h = boxes[:, 3] - boxes[:, 1] + 1\n    keep = np.where((w > min_size) & (h > min_size))[0]\n    return keep\n\n\ndef clip_boxes_to_image(boxes, height, width):\n    \"\"\"Clip an array of boxes to an image with the given height and width.\"\"\"\n    boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))\n    boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))\n    return boxes\n\n\ndef clip_xyxy_to_image(x1, y1, x2, y2, height, width):\n    \"\"\"Clip coordinates to an image with the given height and width.\"\"\"\n    x1 = np.minimum(width - 1., np.maximum(0., x1))\n    y1 = np.minimum(height - 1., np.maximum(0., y1))\n    x2 = np.minimum(width - 1., np.maximum(0., x2))\n    y2 = np.minimum(height - 1., np.maximum(0., y2))\n    return x1, y1, x2, y2\n\n\ndef clip_tiled_boxes(boxes, im_shape):\n    \"\"\"Clip boxes to image boundaries. im_shape is [height, width] and boxes\n    has shape (N, 4 * num_tiled_boxes).\"\"\"\n    assert boxes.shape[1] % 4 == 0, \\\n        'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(\n        boxes.shape[1]\n    )\n    # x1 >= 0\n    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)\n    # y1 >= 0\n    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)\n    # x2 < im_shape[1]\n    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)\n    # y2 < im_shape[0]\n    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)\n    return boxes\n\n\ndef bbox_transform(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):\n    \"\"\"Forward transform that maps proposal boxes to predicted ground-truth\n    boxes using bounding-box regression deltas. See bbox_transform_inv for a\n    description of the weights argument.\n    \"\"\"\n    if boxes.shape[0] == 0:\n        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)\n\n    boxes = boxes.astype(deltas.dtype, copy=False)\n\n    widths = boxes[:, 2] - boxes[:, 0] + 1.0\n    heights = boxes[:, 3] - boxes[:, 1] + 1.0\n    ctr_x = boxes[:, 0] + 0.5 * widths\n    ctr_y = boxes[:, 1] + 0.5 * heights\n\n    wx, wy, ww, wh = weights\n    dx = deltas[:, 0::4] / wx\n    dy = deltas[:, 1::4] / wy\n    dw = deltas[:, 2::4] / ww\n    dh = deltas[:, 3::4] / wh\n\n    # Prevent sending too large values into np.exp()\n    dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)\n    dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)\n\n    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]\n    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]\n    pred_w = np.exp(dw) * widths[:, np.newaxis]\n    pred_h = np.exp(dh) * heights[:, np.newaxis]\n\n    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)\n    # x1\n    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w\n    # y1\n    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h\n    # x2 (note: \"- 1\" is correct; don't be fooled by the asymmetry)\n    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1\n    # y2 (note: \"- 1\" is correct; don't be fooled by the asymmetry)\n    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1\n\n    return pred_boxes\n\n\ndef bbox_transform_inv(boxes, gt_boxes, weights=(1.0, 1.0, 1.0, 1.0)):\n    \"\"\"Inverse transform that computes target bounding-box regression deltas\n    given proposal boxes and ground-truth boxes. The weights argument should be\n    a 4-tuple of multiplicative weights that are applied to the regression\n    target.\n\n    In older versions of this code (and in py-faster-rcnn), the weights were set\n    such that the regression deltas would have unit standard deviation on the\n    training dataset. Presently, rather than computing these statistics exactly,\n    we use a fixed set of weights (10., 10., 5., 5.) by default. These are\n    approximately the weights one would get from COCO using the previous unit\n    stdev heuristic.\n    \"\"\"\n    ex_widths = boxes[:, 2] - boxes[:, 0] + 1.0\n    ex_heights = boxes[:, 3] - boxes[:, 1] + 1.0\n    ex_ctr_x = boxes[:, 0] + 0.5 * ex_widths\n    ex_ctr_y = boxes[:, 1] + 0.5 * ex_heights\n\n    gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0\n    gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0\n    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths\n    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights\n\n    wx, wy, ww, wh = weights\n    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths\n    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights\n    targets_dw = ww * np.log(gt_widths / ex_widths)\n    targets_dh = wh * np.log(gt_heights / ex_heights)\n\n    targets = np.vstack((targets_dx, targets_dy, targets_dw,\n                         targets_dh)).transpose()\n    return targets\n\n\ndef expand_boxes(boxes, scale):\n    \"\"\"Expand an array of boxes by a given scale.\"\"\"\n    w_half = (boxes[:, 2] - boxes[:, 0]) * .5\n    h_half = (boxes[:, 3] - boxes[:, 1]) * .5\n    x_c = (boxes[:, 2] + boxes[:, 0]) * .5\n    y_c = (boxes[:, 3] + boxes[:, 1]) * .5\n\n    w_half *= scale\n    h_half *= scale\n\n    boxes_exp = np.zeros(boxes.shape)\n    boxes_exp[:, 0] = x_c - w_half\n    boxes_exp[:, 2] = x_c + w_half\n    boxes_exp[:, 1] = y_c - h_half\n    boxes_exp[:, 3] = y_c + h_half\n\n    return boxes_exp\n\n\ndef flip_boxes(boxes, im_width):\n    \"\"\"Flip boxes horizontally.\"\"\"\n    boxes_flipped = boxes.copy()\n    boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1\n    boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1\n    return boxes_flipped\n\n\ndef aspect_ratio(boxes, aspect_ratio):\n    \"\"\"Perform width-relative aspect ratio transformation.\"\"\"\n    boxes_ar = boxes.copy()\n    boxes_ar[:, 0::4] = aspect_ratio * boxes[:, 0::4]\n    boxes_ar[:, 2::4] = aspect_ratio * boxes[:, 2::4]\n    return boxes_ar\n\n\ndef box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0):\n    \"\"\"Apply bounding-box voting to refine `top_dets` by voting with `all_dets`.\n    See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the\n    referenced  paper) can be applied by setting `scoring_method` appropriately.\n    \"\"\"\n    # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore]\n    # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore]\n    top_dets_out = top_dets.copy()\n    top_boxes = top_dets[:, :4]\n    all_boxes = all_dets[:, :4]\n    all_scores = all_dets[:, 4]\n    top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)\n    for k in range(top_dets_out.shape[0]):\n        inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0]\n        boxes_to_vote = all_boxes[inds_to_vote, :]\n        ws = all_scores[inds_to_vote]\n        top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws)\n        if scoring_method == 'ID':\n            # Identity, nothing to do\n            pass\n        elif scoring_method == 'TEMP_AVG':\n            # Average probabilities (considered as P(detected class) vs.\n            # P(not the detected class)) after smoothing with a temperature\n            # hyperparameter.\n            P = np.vstack((ws, 1.0 - ws))\n            P_max = np.max(P, axis=0)\n            X = np.log(P / P_max)\n            X_exp = np.exp(X / beta)\n            P_temp = X_exp / np.sum(X_exp, axis=0)\n            P_avg = P_temp[0].mean()\n            top_dets_out[k, 4] = P_avg\n        elif scoring_method == 'AVG':\n            # Combine new probs from overlapping boxes\n            top_dets_out[k, 4] = ws.mean()\n        elif scoring_method == 'IOU_AVG':\n            P = ws\n            ws = top_to_all_overlaps[k, inds_to_vote]\n            P_avg = np.average(P, weights=ws)\n            top_dets_out[k, 4] = P_avg\n        elif scoring_method == 'GENERALIZED_AVG':\n            P_avg = np.mean(ws**beta)**(1.0 / beta)\n            top_dets_out[k, 4] = P_avg\n        elif scoring_method == 'QUASI_SUM':\n            top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta\n        else:\n            raise NotImplementedError(\n                'Unknown scoring method {}'.format(scoring_method)\n            )\n\n    return top_dets_out\n\n\ndef nms(dets, thresh):\n    \"\"\"Apply classic DPM-style greedy NMS.\"\"\"\n    if dets.shape[0] == 0:\n        return []\n    return cython_nms.nms(dets, thresh)\n\n\ndef soft_nms(\n    dets, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear'\n):\n    \"\"\"Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503.\"\"\"\n    if dets.shape[0] == 0:\n        return dets, []\n\n    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}\n    assert method in methods, 'Unknown soft_nms method: {}'.format(method)\n\n    dets, keep = cython_nms.soft_nms(\n        np.ascontiguousarray(dets, dtype=np.float32),\n        np.float32(sigma),\n        np.float32(overlap_thresh),\n        np.float32(score_thresh),\n        np.uint8(methods[method])\n    )\n    return dets, keep\n"
  },
  {
    "path": "detectron/utils/c2.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Helpful utilities for working with Caffe2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom six import string_types\nimport contextlib\nimport subprocess\n\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core\nfrom caffe2.python import dyndep\nfrom caffe2.python import scope\nfrom caffe2.python import workspace\n\nimport detectron.utils.env as envu\n\n\ndef import_contrib_ops():\n    \"\"\"Import contrib ops needed by Detectron.\"\"\"\n    envu.import_nccl_ops()\n\n\ndef import_detectron_ops():\n    \"\"\"Import Detectron ops.\"\"\"\n    detectron_ops_lib = envu.get_detectron_ops_lib()\n    dyndep.InitOpsLibrary(detectron_ops_lib)\n\n\ndef import_custom_ops():\n    \"\"\"Import custom ops.\"\"\"\n    custom_ops_lib = envu.get_custom_ops_lib()\n    dyndep.InitOpsLibrary(custom_ops_lib)\n\n\ndef SuffixNet(name, net, prefix_len, outputs):\n    \"\"\"Returns a new Net from the given Net (`net`) that includes only the ops\n    after removing the first `prefix_len` number of ops. The new Net is thus a\n    suffix of `net`. Blobs listed in `outputs` are registered as external output\n    blobs.\n    \"\"\"\n    outputs = BlobReferenceList(outputs)\n    for output in outputs:\n        assert net.BlobIsDefined(output)\n    new_net = net.Clone(name)\n\n    del new_net.Proto().op[:]\n    del new_net.Proto().external_input[:]\n    del new_net.Proto().external_output[:]\n\n    # Add suffix ops\n    new_net.Proto().op.extend(net.Proto().op[prefix_len:])\n    # Add external input blobs\n    # Treat any undefined blobs as external inputs\n    input_names = [\n        i for op in new_net.Proto().op for i in op.input\n        if not new_net.BlobIsDefined(i)]\n    new_net.Proto().external_input.extend(input_names)\n    # Add external output blobs\n    output_names = [str(o) for o in outputs]\n    new_net.Proto().external_output.extend(output_names)\n    return new_net, [new_net.GetBlobRef(o) for o in output_names]\n\n\ndef BlobReferenceList(blob_ref_or_list):\n    \"\"\"Ensure that the argument is returned as a list of BlobReferences.\"\"\"\n    if isinstance(blob_ref_or_list, core.BlobReference):\n        return [blob_ref_or_list]\n    elif type(blob_ref_or_list) in (list, tuple):\n        for b in blob_ref_or_list:\n            assert isinstance(b, core.BlobReference)\n        return blob_ref_or_list\n    else:\n        raise TypeError(\n            'blob_ref_or_list must be a BlobReference or a list/tuple of '\n            'BlobReferences'\n        )\n\n\ndef UnscopeName(possibly_scoped_name):\n    \"\"\"Remove any name scoping from a (possibly) scoped name. For example,\n    convert the name 'gpu_0/foo' to 'foo'.\"\"\"\n    assert isinstance(possibly_scoped_name, string_types)\n    return possibly_scoped_name[\n        possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]\n\n\n@contextlib.contextmanager\ndef NamedCudaScope(gpu_id):\n    \"\"\"Creates a GPU name scope and CUDA device scope. This function is provided\n    to reduce `with ...` nesting levels.\"\"\"\n    with GpuNameScope(gpu_id):\n        with CudaScope(gpu_id):\n            yield\n\n\n@contextlib.contextmanager\ndef GpuNameScope(gpu_id):\n    \"\"\"Create a name scope for GPU device `gpu_id`.\"\"\"\n    with core.NameScope('gpu_{:d}'.format(gpu_id)):\n        yield\n\n\n@contextlib.contextmanager\ndef CudaScope(gpu_id):\n    \"\"\"Create a CUDA device scope for GPU device `gpu_id`.\"\"\"\n    gpu_dev = CudaDevice(gpu_id)\n    with core.DeviceScope(gpu_dev):\n        yield\n\n\n@contextlib.contextmanager\ndef CpuScope():\n    \"\"\"Create a CPU device scope.\"\"\"\n    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)\n    with core.DeviceScope(cpu_dev):\n        yield\n\n\ndef CudaDevice(gpu_id):\n    \"\"\"Create a Cuda device.\"\"\"\n    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)\n\n\ndef gauss_fill(std):\n    \"\"\"Gaussian fill helper to reduce verbosity.\"\"\"\n    return ('GaussianFill', {'std': std})\n\n\ndef const_fill(value):\n    \"\"\"Constant fill helper to reduce verbosity.\"\"\"\n    return ('ConstantFill', {'value': value})\n\n\ndef get_nvidia_info():\n    return (\n        get_nvidia_smi_output(),\n        workspace.GetCUDAVersion(),\n        workspace.GetCuDNNVersion(),\n    )\n\n\ndef get_nvidia_smi_output():\n    try:\n        info = subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.STDOUT)\n        info = info.decode(\"utf8\")\n    except Exception as e:\n        info = \"Executing nvidia-smi failed: \" + str(e)\n    return info.strip()\n"
  },
  {
    "path": "detectron/utils/collections.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"A simple attribute dictionary used for representing configuration options.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n\nclass AttrDict(dict):\n\n    IMMUTABLE = '__immutable__'\n\n    def __init__(self, *args, **kwargs):\n        super(AttrDict, self).__init__(*args, **kwargs)\n        self.__dict__[AttrDict.IMMUTABLE] = False\n\n    def __getattr__(self, name):\n        if name in self.__dict__:\n            return self.__dict__[name]\n        elif name in self:\n            return self[name]\n        else:\n            raise AttributeError(name)\n\n    def __setattr__(self, name, value):\n        if not self.__dict__[AttrDict.IMMUTABLE]:\n            if name in self.__dict__:\n                self.__dict__[name] = value\n            else:\n                self[name] = value\n        else:\n            raise AttributeError(\n                'Attempted to set \"{}\" to \"{}\", but AttrDict is immutable'.\n                format(name, value)\n            )\n\n    def immutable(self, is_immutable):\n        \"\"\"Set immutability to is_immutable and recursively apply the setting\n        to all nested AttrDicts.\n        \"\"\"\n        self.__dict__[AttrDict.IMMUTABLE] = is_immutable\n        # Recursively set immutable state\n        for v in self.__dict__.values():\n            if isinstance(v, AttrDict):\n                v.immutable(is_immutable)\n        for v in self.values():\n            if isinstance(v, AttrDict):\n                v.immutable(is_immutable)\n\n    def is_immutable(self):\n        return self.__dict__[AttrDict.IMMUTABLE]\n"
  },
  {
    "path": "detectron/utils/colormap.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"An awesome colormap for really neat visualizations.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\n\ndef colormap(rgb=False):\n    color_list = np.array(\n        [\n            0.000, 0.447, 0.741,\n            0.850, 0.325, 0.098,\n            0.929, 0.694, 0.125,\n            0.494, 0.184, 0.556,\n            0.466, 0.674, 0.188,\n            0.301, 0.745, 0.933,\n            0.635, 0.078, 0.184,\n            0.300, 0.300, 0.300,\n            0.600, 0.600, 0.600,\n            1.000, 0.000, 0.000,\n            1.000, 0.500, 0.000,\n            0.749, 0.749, 0.000,\n            0.000, 1.000, 0.000,\n            0.000, 0.000, 1.000,\n            0.667, 0.000, 1.000,\n            0.333, 0.333, 0.000,\n            0.333, 0.667, 0.000,\n            0.333, 1.000, 0.000,\n            0.667, 0.333, 0.000,\n            0.667, 0.667, 0.000,\n            0.667, 1.000, 0.000,\n            1.000, 0.333, 0.000,\n            1.000, 0.667, 0.000,\n            1.000, 1.000, 0.000,\n            0.000, 0.333, 0.500,\n            0.000, 0.667, 0.500,\n            0.000, 1.000, 0.500,\n            0.333, 0.000, 0.500,\n            0.333, 0.333, 0.500,\n            0.333, 0.667, 0.500,\n            0.333, 1.000, 0.500,\n            0.667, 0.000, 0.500,\n            0.667, 0.333, 0.500,\n            0.667, 0.667, 0.500,\n            0.667, 1.000, 0.500,\n            1.000, 0.000, 0.500,\n            1.000, 0.333, 0.500,\n            1.000, 0.667, 0.500,\n            1.000, 1.000, 0.500,\n            0.000, 0.333, 1.000,\n            0.000, 0.667, 1.000,\n            0.000, 1.000, 1.000,\n            0.333, 0.000, 1.000,\n            0.333, 0.333, 1.000,\n            0.333, 0.667, 1.000,\n            0.333, 1.000, 1.000,\n            0.667, 0.000, 1.000,\n            0.667, 0.333, 1.000,\n            0.667, 0.667, 1.000,\n            0.667, 1.000, 1.000,\n            1.000, 0.000, 1.000,\n            1.000, 0.333, 1.000,\n            1.000, 0.667, 1.000,\n            0.167, 0.000, 0.000,\n            0.333, 0.000, 0.000,\n            0.500, 0.000, 0.000,\n            0.667, 0.000, 0.000,\n            0.833, 0.000, 0.000,\n            1.000, 0.000, 0.000,\n            0.000, 0.167, 0.000,\n            0.000, 0.333, 0.000,\n            0.000, 0.500, 0.000,\n            0.000, 0.667, 0.000,\n            0.000, 0.833, 0.000,\n            0.000, 1.000, 0.000,\n            0.000, 0.000, 0.167,\n            0.000, 0.000, 0.333,\n            0.000, 0.000, 0.500,\n            0.000, 0.000, 0.667,\n            0.000, 0.000, 0.833,\n            0.000, 0.000, 1.000,\n            0.000, 0.000, 0.000,\n            0.143, 0.143, 0.143,\n            0.286, 0.286, 0.286,\n            0.429, 0.429, 0.429,\n            0.571, 0.571, 0.571,\n            0.714, 0.714, 0.714,\n            0.857, 0.857, 0.857,\n            1.000, 1.000, 1.000\n        ]\n    ).astype(np.float32)\n    color_list = color_list.reshape((-1, 3)) * 255\n    if not rgb:\n        color_list = color_list[:, ::-1]\n    return color_list\n"
  },
  {
    "path": "detectron/utils/coordinator.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Coordinated access to a shared multithreading/processing queue.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport contextlib\nimport logging\nimport threading\nimport traceback\nfrom six.moves import queue as Queue\n\nlog = logging.getLogger(__name__)\n\n\nclass Coordinator:\n\n    def __init__(self):\n        self._event = threading.Event()\n\n    def request_stop(self):\n        log.debug('Coordinator stopping')\n        self._event.set()\n\n    def should_stop(self):\n        return self._event.is_set()\n\n    def wait_for_stop(self):\n        return self._event.wait()\n\n    @contextlib.contextmanager\n    def stop_on_exception(self):\n        try:\n            yield\n        except Exception:\n            if not self.should_stop():\n                traceback.print_exc()\n                self.request_stop()\n\n\ndef coordinated_get(coordinator, queue):\n    while not coordinator.should_stop():\n        try:\n            return queue.get(block=True, timeout=1.0)\n        except Queue.Empty:\n            continue\n    raise Exception('Coordinator stopped during get()')\n\n\ndef coordinated_put(coordinator, queue, element):\n    while not coordinator.should_stop():\n        try:\n            queue.put(element, block=True, timeout=1.0)\n            return\n        except Queue.Full:\n            continue\n    raise Exception('Coordinator stopped during put()')\n"
  },
  {
    "path": "detectron/utils/cython_bbox.pyx",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Sergey Karayev\n# --------------------------------------------------------\n\ncimport cython\nimport numpy as np\ncimport numpy as np\n\nDTYPE = np.float32\nctypedef np.float32_t DTYPE_t\n\n@cython.boundscheck(False)\ndef bbox_overlaps(\n        np.ndarray[DTYPE_t, ndim=2] boxes,\n        np.ndarray[DTYPE_t, ndim=2] query_boxes):\n    \"\"\"\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n    \"\"\"\n    cdef unsigned int N = boxes.shape[0]\n    cdef unsigned int K = query_boxes.shape[0]\n    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)\n    cdef DTYPE_t iw, ih, box_area\n    cdef DTYPE_t ua\n    cdef unsigned int k, n\n    with nogil:\n        for k in range(K):\n            box_area = (\n                (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\n                (query_boxes[k, 3] - query_boxes[k, 1] + 1)\n            )\n            for n in range(N):\n                iw = (\n                    min(boxes[n, 2], query_boxes[k, 2]) -\n                    max(boxes[n, 0], query_boxes[k, 0]) + 1\n                )\n                if iw > 0:\n                    ih = (\n                        min(boxes[n, 3], query_boxes[k, 3]) -\n                        max(boxes[n, 1], query_boxes[k, 1]) + 1\n                    )\n                    if ih > 0:\n                        ua = float(\n                            (boxes[n, 2] - boxes[n, 0] + 1) *\n                            (boxes[n, 3] - boxes[n, 1] + 1) +\n                            box_area - iw * ih\n                        )\n                        overlaps[n, k] = iw * ih / ua\n    return overlaps\n"
  },
  {
    "path": "detectron/utils/cython_nms.pyx",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\ncimport cython\nimport numpy as np\ncimport numpy as np\n\ncdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil:\n    return a if a >= b else b\n\ncdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil:\n    return a if a <= b else b\n\n@cython.boundscheck(False)\n@cython.cdivision(True)\n@cython.wraparound(False)\ndef nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh):\n    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]\n    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]\n    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]\n    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]\n    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]\n\n    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)\n    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]\n\n    cdef int ndets = dets.shape[0]\n    cdef np.ndarray[np.int_t, ndim=1] suppressed = \\\n            np.zeros((ndets), dtype=np.int)\n\n    # nominal indices\n    cdef int _i, _j\n    # sorted indices\n    cdef int i, j\n    # temp variables for box i's (the box currently under consideration)\n    cdef np.float32_t ix1, iy1, ix2, iy2, iarea\n    # variables for computing overlap with box j (lower scoring box)\n    cdef np.float32_t xx1, yy1, xx2, yy2\n    cdef np.float32_t w, h\n    cdef np.float32_t inter, ovr\n\n    with nogil:\n      for _i in range(ndets):\n          i = order[_i]\n          if suppressed[i] == 1:\n              continue\n          ix1 = x1[i]\n          iy1 = y1[i]\n          ix2 = x2[i]\n          iy2 = y2[i]\n          iarea = areas[i]\n          for _j in range(_i + 1, ndets):\n              j = order[_j]\n              if suppressed[j] == 1:\n                  continue\n              xx1 = max(ix1, x1[j])\n              yy1 = max(iy1, y1[j])\n              xx2 = min(ix2, x2[j])\n              yy2 = min(iy2, y2[j])\n              w = max(0.0, xx2 - xx1 + 1)\n              h = max(0.0, yy2 - yy1 + 1)\n              inter = w * h\n              ovr = inter / (iarea + areas[j] - inter)\n              if ovr >= thresh:\n                  suppressed[j] = 1\n\n    return np.where(suppressed == 0)[0]\n\n# ----------------------------------------------------------\n# Soft-NMS: Improving Object Detection With One Line of Code\n# Copyright (c) University of Maryland, College Park\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Navaneeth Bodla and Bharat Singh\n# ----------------------------------------------------------\n@cython.boundscheck(False)\n@cython.cdivision(True)\n@cython.wraparound(False)\ndef soft_nms(\n    np.ndarray[float, ndim=2] boxes_in,\n    float sigma=0.5,\n    float Nt=0.3,\n    float threshold=0.001,\n    unsigned int method=0\n):\n    boxes = boxes_in.copy()\n    cdef unsigned int N = boxes.shape[0]\n    cdef float iw, ih, box_area\n    cdef float ua\n    cdef int pos = 0\n    cdef float maxscore = 0\n    cdef int maxpos = 0\n    cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov\n    inds = np.arange(N)\n\n    for i in range(N):\n        maxscore = boxes[i, 4]\n        maxpos = i\n\n        tx1 = boxes[i,0]\n        ty1 = boxes[i,1]\n        tx2 = boxes[i,2]\n        ty2 = boxes[i,3]\n        ts = boxes[i,4]\n        ti = inds[i]\n\n        pos = i + 1\n        # get max box\n        while pos < N:\n            if maxscore < boxes[pos, 4]:\n                maxscore = boxes[pos, 4]\n                maxpos = pos\n            pos = pos + 1\n\n        # add max box as a detection\n        boxes[i,0] = boxes[maxpos,0]\n        boxes[i,1] = boxes[maxpos,1]\n        boxes[i,2] = boxes[maxpos,2]\n        boxes[i,3] = boxes[maxpos,3]\n        boxes[i,4] = boxes[maxpos,4]\n        inds[i] = inds[maxpos]\n\n        # swap ith box with position of max box\n        boxes[maxpos,0] = tx1\n        boxes[maxpos,1] = ty1\n        boxes[maxpos,2] = tx2\n        boxes[maxpos,3] = ty2\n        boxes[maxpos,4] = ts\n        inds[maxpos] = ti\n\n        tx1 = boxes[i,0]\n        ty1 = boxes[i,1]\n        tx2 = boxes[i,2]\n        ty2 = boxes[i,3]\n        ts = boxes[i,4]\n\n        pos = i + 1\n        # NMS iterations, note that N changes if detection boxes fall below\n        # threshold\n        while pos < N:\n            x1 = boxes[pos, 0]\n            y1 = boxes[pos, 1]\n            x2 = boxes[pos, 2]\n            y2 = boxes[pos, 3]\n            s = boxes[pos, 4]\n\n            area = (x2 - x1 + 1) * (y2 - y1 + 1)\n            iw = (min(tx2, x2) - max(tx1, x1) + 1)\n            if iw > 0:\n                ih = (min(ty2, y2) - max(ty1, y1) + 1)\n                if ih > 0:\n                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)\n                    ov = iw * ih / ua #iou between max box and detection box\n\n                    if method == 1: # linear\n                        if ov > Nt:\n                            weight = 1 - ov\n                        else:\n                            weight = 1\n                    elif method == 2: # gaussian\n                        weight = np.exp(-(ov * ov)/sigma)\n                    else: # original NMS\n                        if ov > Nt:\n                            weight = 0\n                        else:\n                            weight = 1\n\n                    boxes[pos, 4] = weight*boxes[pos, 4]\n\n                    # if box score falls below threshold, discard the box by\n                    # swapping with last box update N\n                    if boxes[pos, 4] < threshold:\n                        boxes[pos,0] = boxes[N-1, 0]\n                        boxes[pos,1] = boxes[N-1, 1]\n                        boxes[pos,2] = boxes[N-1, 2]\n                        boxes[pos,3] = boxes[N-1, 3]\n                        boxes[pos,4] = boxes[N-1, 4]\n                        inds[pos] = inds[N-1]\n                        N = N - 1\n                        pos = pos - 1\n\n            pos = pos + 1\n\n    return boxes[:N], inds[:N]\n"
  },
  {
    "path": "detectron/utils/env.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Environment helper functions.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport os\nimport sys\nimport yaml\n\n# Default value of the CMake install prefix\n_CMAKE_INSTALL_PREFIX = '/usr/local'\n# Detectron ops lib\n_DETECTRON_OPS_LIB = 'libcaffe2_detectron_ops_gpu.so'\n\n\ndef get_runtime_dir():\n    \"\"\"Retrieve the path to the runtime directory.\"\"\"\n    return sys.path[0]\n\n\ndef get_py_bin_ext():\n    \"\"\"Retrieve python binary extension.\"\"\"\n    return '.py'\n\n\ndef set_up_matplotlib():\n    \"\"\"Set matplotlib up.\"\"\"\n    import matplotlib\n    # Use a non-interactive backend\n    matplotlib.use('Agg')\n\n\ndef exit_on_error():\n    \"\"\"Exit from a detectron tool when there's an error.\"\"\"\n    sys.exit(1)\n\n\ndef import_nccl_ops():\n    \"\"\"Import NCCL ops.\"\"\"\n    # There is no need to load NCCL ops since the\n    # NCCL dependency is built into the Caffe2 gpu lib\n    pass\n\n\ndef get_detectron_ops_lib():\n    \"\"\"Retrieve Detectron ops library.\"\"\"\n    # Candidate prefixes for detectron ops lib path\n    prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path\n    # Candidate subdirs for detectron ops lib\n    subdirs = ['lib', 'torch/lib']\n    # Try to find detectron ops lib\n    for prefix in prefixes:\n        for subdir in subdirs:\n            ops_path = os.path.join(prefix, subdir, _DETECTRON_OPS_LIB)\n            if os.path.exists(ops_path):\n                print('Found Detectron ops lib: {}'.format(ops_path))\n                return ops_path\n    raise Exception('Detectron ops lib not found')\n\n\ndef get_custom_ops_lib():\n    \"\"\"Retrieve custom ops library.\"\"\"\n    det_dir, _ = os.path.split(os.path.dirname(__file__))\n    root_dir, _ = os.path.split(det_dir)\n    custom_ops_lib = os.path.join(\n        root_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')\n    assert os.path.exists(custom_ops_lib), \\\n        'Custom ops lib not found at \\'{}\\''.format(custom_ops_lib)\n    return custom_ops_lib\n\n\n# YAML load/dump function aliases\nyaml_load = yaml.load\nyaml_dump = yaml.dump\n"
  },
  {
    "path": "detectron/utils/image.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Image helper functions.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport numpy as np\n\n\ndef aspect_ratio_rel(im, aspect_ratio):\n    \"\"\"Performs width-relative aspect ratio transformation.\"\"\"\n    im_h, im_w = im.shape[:2]\n    im_ar_w = int(round(aspect_ratio * im_w))\n    im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))\n    return im_ar\n\n\ndef aspect_ratio_abs(im, aspect_ratio):\n    \"\"\"Performs absolute aspect ratio transformation.\"\"\"\n    im_h, im_w = im.shape[:2]\n    im_area = im_h * im_w\n\n    im_ar_w = np.sqrt(im_area * aspect_ratio)\n    im_ar_h = np.sqrt(im_area / aspect_ratio)\n    assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)\n\n    im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))\n    return im_ar\n"
  },
  {
    "path": "detectron/utils/io.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"IO utilities.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport errno\nimport hashlib\nimport logging\nimport os\nimport re\nimport six\nimport sys\nfrom six.moves import cPickle as pickle\nfrom six.moves import urllib\nfrom uuid import uuid4\n\nlogger = logging.getLogger(__name__)\n\n_DETECTRON_S3_BASE_URL = 'https://dl.fbaipublicfiles.com/detectron'\n\n\ndef save_object(obj, file_name, pickle_format=2):\n    \"\"\"Save a Python object by pickling it.\n\nUnless specifically overridden, we want to save it in Pickle format=2 since this\nwill allow other Python2 executables to load the resulting Pickle. When we want\nto completely remove Python2 backward-compatibility, we can bump it up to 3. We\nshould never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting\nfile is manifested or used, external to the system.\n    \"\"\"\n    file_name = os.path.abspath(file_name)\n    # Avoid filesystem race conditions (particularly on network filesystems)\n    # by saving to a random tmp file on the same filesystem, and then\n    # atomically rename to the target filename.\n    tmp_file_name = file_name + \".tmp.\" + uuid4().hex\n    try:\n        with open(tmp_file_name, 'wb') as f:\n            pickle.dump(obj, f, pickle_format)\n            f.flush()  # make sure it's written to disk\n            os.fsync(f.fileno())\n        os.rename(tmp_file_name, file_name)\n    finally:\n        # Clean up the temp file on failure. Rather than using os.path.exists(),\n        # which can be unreliable on network filesystems, attempt to delete and\n        # ignore os errors.\n        try:\n            os.remove(tmp_file_name)\n        except EnvironmentError as e:  # parent class of IOError, OSError\n            if getattr(e, 'errno', None) != errno.ENOENT:  # We expect ENOENT\n                logger.info(\"Could not delete temp file %r\",\n                    tmp_file_name, exc_info=True)\n                # pass through since we don't want the job to crash\n\n\ndef load_object(file_name):\n    with open(file_name, 'rb') as f:\n        # The default encoding used while unpickling is 7-bit (ASCII.) However,\n        # the blobs are arbitrary 8-bit bytes which don't agree. The absolute\n        # correct way to do this is to use `encoding=\"bytes\"` and then interpret\n        # the blob names either as ASCII, or better, as unicode utf-8. A\n        # reasonable fix, however, is to treat it the encoding as 8-bit latin1\n        # (which agrees with the first 256 characters of Unicode anyway.)\n        if six.PY2:\n            return pickle.load(f)\n        else:\n            return pickle.load(f, encoding='latin1')\n\n\ndef cache_url(url_or_file, cache_dir):\n    \"\"\"Download the file specified by the URL to the cache_dir and return the\n    path to the cached file. If the argument is not a URL, simply return it as\n    is.\n    \"\"\"\n    is_url = re.match(\n        r'^(?:http)s?://', url_or_file, re.IGNORECASE\n    ) is not None\n\n    if not is_url:\n        return url_or_file\n\n    url = url_or_file\n    assert url.startswith(_DETECTRON_S3_BASE_URL), \\\n        ('Detectron only automatically caches URLs in the Detectron S3 '\n         'bucket: {}').format(_DETECTRON_S3_BASE_URL)\n\n    cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)\n    if os.path.exists(cache_file_path):\n        assert_cache_file_is_ok(url, cache_file_path)\n        return cache_file_path\n\n    cache_file_dir = os.path.dirname(cache_file_path)\n    if not os.path.exists(cache_file_dir):\n        os.makedirs(cache_file_dir)\n\n    logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))\n    download_url(url, cache_file_path)\n    assert_cache_file_is_ok(url, cache_file_path)\n    return cache_file_path\n\n\ndef assert_cache_file_is_ok(url, file_path):\n    \"\"\"Check that cache file has the correct hash.\"\"\"\n    # File is already in the cache, verify that the md5sum matches and\n    # return local path\n    cache_file_md5sum = _get_file_md5sum(file_path)\n    ref_md5sum = _get_reference_md5sum(url)\n    assert cache_file_md5sum == ref_md5sum, \\\n        ('Target URL {} appears to be downloaded to the local cache file '\n         '{}, but the md5 hash of the local file does not match the '\n         'reference (actual: {} vs. expected: {}). You may wish to delete '\n         'the cached file and try again to trigger automatic '\n         'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)\n\n\ndef _progress_bar(count, total):\n    \"\"\"Report download progress.\n    Credit:\n    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113\n    \"\"\"\n    bar_len = 60\n    filled_len = int(round(bar_len * count / float(total)))\n\n    percents = round(100.0 * count / float(total), 1)\n    bar = '=' * filled_len + '-' * (bar_len - filled_len)\n\n    sys.stdout.write(\n        '  [{}] {}% of {:.1f}MB file  \\r'.\n        format(bar, percents, total / 1024 / 1024)\n    )\n    sys.stdout.flush()\n    if count >= total:\n        sys.stdout.write('\\n')\n\n\ndef download_url(\n    url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar\n):\n    \"\"\"Download url and write it to dst_file_path.\n    Credit:\n    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook\n    \"\"\"\n    response = urllib.request.urlopen(url)\n    if six.PY2:\n        total_size = response.info().getheader('Content-Length').strip()\n    else:\n        total_size = response.info().get('Content-Length').strip()\n    total_size = int(total_size)\n    bytes_so_far = 0\n\n    with open(dst_file_path, 'wb') as f:\n        while 1:\n            chunk = response.read(chunk_size)\n            bytes_so_far += len(chunk)\n            if not chunk:\n                break\n            if progress_hook:\n                progress_hook(bytes_so_far, total_size)\n            f.write(chunk)\n\n    return bytes_so_far\n\n\ndef _get_file_md5sum(file_name):\n    \"\"\"Compute the md5 hash of a file.\"\"\"\n    hash_obj = hashlib.md5()\n    with open(file_name, 'rb') as f:\n        hash_obj.update(f.read())\n    return hash_obj.hexdigest().encode('utf-8')\n\n\ndef _get_reference_md5sum(url):\n    \"\"\"By convention the md5 hash for url is stored in url + '.md5sum'.\"\"\"\n    url_md5sum = url + '.md5sum'\n    md5sum = urllib.request.urlopen(url_md5sum).read().strip()\n    return md5sum\n"
  },
  {
    "path": "detectron/utils/keypoints.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Keypoint utilities (somewhat specific to COCO keypoints).\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport numpy as np\n\nfrom detectron.core.config import cfg\nimport detectron.utils.blob as blob_utils\n\n\ndef get_keypoints():\n    \"\"\"Get the COCO keypoints and their left/right flip coorespondence map.\"\"\"\n    # Keypoints are not available in the COCO json for the test split, so we\n    # provide them here.\n    keypoints = [\n        'nose',\n        'left_eye',\n        'right_eye',\n        'left_ear',\n        'right_ear',\n        'left_shoulder',\n        'right_shoulder',\n        'left_elbow',\n        'right_elbow',\n        'left_wrist',\n        'right_wrist',\n        'left_hip',\n        'right_hip',\n        'left_knee',\n        'right_knee',\n        'left_ankle',\n        'right_ankle'\n    ]\n    keypoint_flip_map = {\n        'left_eye': 'right_eye',\n        'left_ear': 'right_ear',\n        'left_shoulder': 'right_shoulder',\n        'left_elbow': 'right_elbow',\n        'left_wrist': 'right_wrist',\n        'left_hip': 'right_hip',\n        'left_knee': 'right_knee',\n        'left_ankle': 'right_ankle'\n    }\n    return keypoints, keypoint_flip_map\n\n\ndef get_person_class_index():\n    \"\"\"Index of the person class in COCO.\"\"\"\n    return 1\n\n\ndef flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):\n    \"\"\"Left/right flip keypoint_coords. keypoints and keypoint_flip_map are\n    accessible from get_keypoints().\n    \"\"\"\n    flipped_kps = keypoint_coords.copy()\n    for lkp, rkp in keypoint_flip_map.items():\n        lid = keypoints.index(lkp)\n        rid = keypoints.index(rkp)\n        flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]\n        flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]\n\n    # Flip x coordinates\n    flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1\n    # Maintain COCO convention that if visibility == 0, then x, y = 0\n    inds = np.where(flipped_kps[:, 2, :] == 0)\n    flipped_kps[inds[0], 0, inds[1]] = 0\n    return flipped_kps\n\n\ndef flip_heatmaps(heatmaps):\n    \"\"\"Flip heatmaps horizontally.\"\"\"\n    keypoints, flip_map = get_keypoints()\n    heatmaps_flipped = heatmaps.copy()\n    for lkp, rkp in flip_map.items():\n        lid = keypoints.index(lkp)\n        rid = keypoints.index(rkp)\n        heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]\n        heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]\n    heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]\n    return heatmaps_flipped\n\n\ndef heatmaps_to_keypoints(maps, rois):\n    \"\"\"Extract predicted keypoint locations from heatmaps. Output has shape\n    (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)\n    for each keypoint.\n    \"\"\"\n    # This function converts a discrete image coordinate in a HEATMAP_SIZE x\n    # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain\n    # consistency with keypoints_to_heatmap_labels by using the conversion from\n    # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a\n    # continuous coordinate.\n    offset_x = rois[:, 0]\n    offset_y = rois[:, 1]\n\n    widths = rois[:, 2] - rois[:, 0]\n    heights = rois[:, 3] - rois[:, 1]\n    widths = np.maximum(widths, 1)\n    heights = np.maximum(heights, 1)\n    widths_ceil = np.ceil(widths)\n    heights_ceil = np.ceil(heights)\n\n    # NCHW to NHWC for use with OpenCV\n    maps = np.transpose(maps, [0, 2, 3, 1])\n    min_size = cfg.KRCNN.INFERENCE_MIN_SIZE\n    xy_preds = np.zeros(\n        (len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)\n    for i in range(len(rois)):\n        if min_size > 0:\n            roi_map_width = int(np.maximum(widths_ceil[i], min_size))\n            roi_map_height = int(np.maximum(heights_ceil[i], min_size))\n        else:\n            roi_map_width = widths_ceil[i]\n            roi_map_height = heights_ceil[i]\n        width_correction = widths[i] / roi_map_width\n        height_correction = heights[i] / roi_map_height\n        roi_map = cv2.resize(\n            maps[i], (roi_map_width, roi_map_height),\n            interpolation=cv2.INTER_CUBIC)\n        # Bring back to CHW\n        roi_map = np.transpose(roi_map, [2, 0, 1])\n        roi_map_probs = scores_to_probs(roi_map.copy())\n        w = roi_map.shape[2]\n        for k in range(cfg.KRCNN.NUM_KEYPOINTS):\n            pos = roi_map[k, :, :].argmax()\n            x_int = pos % w\n            y_int = (pos - x_int) // w\n            assert (roi_map_probs[k, y_int, x_int] ==\n                    roi_map_probs[k, :, :].max())\n            x = (x_int + 0.5) * width_correction\n            y = (y_int + 0.5) * height_correction\n            xy_preds[i, 0, k] = x + offset_x[i]\n            xy_preds[i, 1, k] = y + offset_y[i]\n            xy_preds[i, 2, k] = roi_map[k, y_int, x_int]\n            xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]\n\n    return xy_preds\n\n\ndef keypoints_to_heatmap_labels(keypoints, rois):\n    \"\"\"Encode keypoint location in the target heatmap for use in\n    SoftmaxWithLoss.\n    \"\"\"\n    # Maps keypoints from the half-open interval [x1, x2) on continuous image\n    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image\n    # coordinates. We use the continuous <-> discrete conversion from Heckbert\n    # 1990 (\"What is the coordinate of a pixel?\"): d = floor(c) and c = d + 0.5,\n    # where d is a discrete coordinate and c is a continuous coordinate.\n    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS\n\n    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)\n    heatmaps = blob_utils.zeros(shape)\n    weights = blob_utils.zeros(shape)\n\n    offset_x = rois[:, 0]\n    offset_y = rois[:, 1]\n    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])\n    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])\n\n    for kp in range(keypoints.shape[2]):\n        vis = keypoints[:, 2, kp] > 0\n        x = keypoints[:, 0, kp].astype(np.float32)\n        y = keypoints[:, 1, kp].astype(np.float32)\n        # Since we use floor below, if a keypoint is exactly on the roi's right\n        # or bottom boundary, we shift it in by eps (conceptually) to keep it in\n        # the ground truth heatmap.\n        x_boundary_inds = np.where(x == rois[:, 2])[0]\n        y_boundary_inds = np.where(y == rois[:, 3])[0]\n        x = (x - offset_x) * scale_x\n        x = np.floor(x)\n        if len(x_boundary_inds) > 0:\n            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1\n\n        y = (y - offset_y) * scale_y\n        y = np.floor(y)\n        if len(y_boundary_inds) > 0:\n            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1\n\n        valid_loc = np.logical_and(\n            np.logical_and(x >= 0, y >= 0),\n            np.logical_and(\n                x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))\n\n        valid = np.logical_and(valid_loc, vis)\n        valid = valid.astype(np.int32)\n\n        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x\n        heatmaps[:, kp] = lin_ind * valid\n        weights[:, kp] = valid\n\n    return heatmaps, weights\n\n\ndef scores_to_probs(scores):\n    \"\"\"Transforms CxHxW of scores to probabilities spatially.\"\"\"\n    channels = scores.shape[0]\n    for c in range(channels):\n        temp = scores[c, :, :]\n        max_score = temp.max()\n        temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))\n        scores[c, :, :] = temp\n    return scores\n\n\ndef nms_oks(kp_predictions, rois, thresh):\n    \"\"\"Nms based on kp predictions.\"\"\"\n    scores = np.mean(kp_predictions[:, 2, :], axis=1)\n    order = scores.argsort()[::-1]\n\n    keep = []\n    while order.size > 0:\n        i = order[0]\n        keep.append(i)\n        ovr = compute_oks(\n            kp_predictions[i], rois[i], kp_predictions[order[1:]],\n            rois[order[1:]])\n        inds = np.where(ovr <= thresh)[0]\n        order = order[inds + 1]\n\n    return keep\n\n\ndef compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):\n    \"\"\"Compute OKS for predicted keypoints wrt gt_keypoints.\n    src_keypoints: 4xK\n    src_roi: 4x1\n    dst_keypoints: Nx4xK\n    dst_roi: Nx4\n    \"\"\"\n\n    sigmas = np.array([\n        .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87,\n        .87, .89, .89]) / 10.0\n    vars = (sigmas * 2)**2\n\n    # area\n    src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)\n\n    # measure the per-keypoint distance if keypoints visible\n    dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]\n    dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]\n\n    e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2\n    e = np.sum(np.exp(-e), axis=1) / e.shape[1]\n\n    return e\n"
  },
  {
    "path": "detectron/utils/logging.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Utilities for logging.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import deque\nfrom email.mime.text import MIMEText\nimport json\nimport logging\nimport numpy as np\nimport smtplib\nimport sys\n\n\ndef log_json_stats(stats, sort_keys=True):\n    # hack to control precision of top-level floats\n    stats = {\n        k: '{:.6f}'.format(v) if isinstance(v, float) else v\n        for k, v in stats.items()\n    }\n    print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))\n\n\nclass SmoothedValue:\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size):\n        self.deque = deque(maxlen=window_size)\n        self.series = []\n        self.total = 0.0\n        self.count = 0\n\n    def AddValue(self, value):\n        self.deque.append(value)\n        self.series.append(value)\n        self.count += 1\n        self.total += value\n\n    def GetMedianValue(self):\n        return np.median(self.deque)\n\n    def GetAverageValue(self):\n        return np.mean(self.deque)\n\n    def GetGlobalAverageValue(self):\n        return self.total / self.count\n\n\ndef send_email(subject, body, to):\n    s = smtplib.SMTP('localhost')\n    mime = MIMEText(body)\n    mime['Subject'] = subject\n    mime['To'] = to\n    s.sendmail('detectron', to, mime.as_string())\n\n\ndef setup_logging(name):\n    FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'\n    # Manually clear root loggers to prevent any module that may have called\n    # logging.basicConfig() from blocking our logging setup\n    logging.root.handlers = []\n    logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)\n    logger = logging.getLogger(name)\n    return logger\n"
  },
  {
    "path": "detectron/utils/lr_policy.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Learning rate policies.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nfrom detectron.core.config import cfg\n\n\ndef get_lr_at_iter(it):\n    \"\"\"Get the learning rate at iteration it according to the cfg.SOLVER\n    settings.\n    \"\"\"\n    lr = get_lr_func()(it)\n    if it < cfg.SOLVER.WARM_UP_ITERS:\n        method = cfg.SOLVER.WARM_UP_METHOD\n        if method == 'constant':\n            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR\n        elif method == 'linear':\n            alpha = it / cfg.SOLVER.WARM_UP_ITERS\n            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha\n        else:\n            raise KeyError('Unknown SOLVER.WARM_UP_METHOD: {}'.format(method))\n        lr *= warmup_factor\n    return np.float32(lr)\n\n\n# ---------------------------------------------------------------------------- #\n# Learning rate policy functions\n# ---------------------------------------------------------------------------- #\n\ndef lr_func_steps_with_lrs(cur_iter):\n    \"\"\"For cfg.SOLVER.LR_POLICY = 'steps_with_lrs'\n\n    Change the learning rate to specified values at specified iterations.\n\n    Example:\n    cfg.SOLVER.MAX_ITER: 90\n    cfg.SOLVER.STEPS:    [0,    60,    80]\n    cfg.SOLVER.LRS:      [0.02, 0.002, 0.0002]\n    for cur_iter in [0, 59]   use 0.02\n                 in [60, 79]  use 0.002\n                 in [80, inf] use 0.0002\n    \"\"\"\n    ind = get_step_index(cur_iter)\n    return cfg.SOLVER.LRS[ind]\n\n\ndef lr_func_steps_with_decay(cur_iter):\n    \"\"\"For cfg.SOLVER.LR_POLICY = 'steps_with_decay'\n\n    Change the learning rate specified iterations based on the formula\n    lr = base_lr * gamma ** lr_step_count.\n\n    Example:\n    cfg.SOLVER.MAX_ITER: 90\n    cfg.SOLVER.STEPS:    [0,    60,    80]\n    cfg.SOLVER.BASE_LR:  0.02\n    cfg.SOLVER.GAMMA:    0.1\n    for cur_iter in [0, 59]   use 0.02 = 0.02 * 0.1 ** 0\n                 in [60, 79]  use 0.002 = 0.02 * 0.1 ** 1\n                 in [80, inf] use 0.0002 = 0.02 * 0.1 ** 2\n    \"\"\"\n    ind = get_step_index(cur_iter)\n    return cfg.SOLVER.BASE_LR * cfg.SOLVER.GAMMA ** ind\n\n\ndef lr_func_step(cur_iter):\n    \"\"\"For cfg.SOLVER.LR_POLICY = 'step'\n    \"\"\"\n    return (\n        cfg.SOLVER.BASE_LR *\n        cfg.SOLVER.GAMMA ** (cur_iter // cfg.SOLVER.STEP_SIZE))\n\n\ndef lr_func_cosine_decay(cur_iter):\n    \"\"\"For cfg.SOLVER.LR_POLICY = 'cosine_decay'\n    \"\"\"\n    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER\n    cos_frac = 0.5 * (np.cos(np.pi * iter_frac) + 1)\n    return cfg.SOLVER.BASE_LR * cos_frac\n\n\ndef lr_func_exp_decay(cur_iter):\n    \"\"\"For cfg.SOLVER.LR_POLICY = 'exp_decay'\n    \"\"\"\n    # GAMMA is final/initial learning rate ratio\n    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER\n    exp_frac = np.exp(iter_frac * np.log(cfg.SOLVER.GAMMA))\n    return cfg.SOLVER.BASE_LR * exp_frac\n\n\n# ---------------------------------------------------------------------------- #\n# Helpers\n# ---------------------------------------------------------------------------- #\n\ndef get_step_index(cur_iter):\n    \"\"\"Given an iteration, find which learning rate step we're at.\"\"\"\n    assert cfg.SOLVER.STEPS[0] == 0, 'The first step should always start at 0.'\n    steps = cfg.SOLVER.STEPS + [cfg.SOLVER.MAX_ITER]\n    for ind, step in enumerate(steps):  # NoQA\n        if cur_iter < step:\n            break\n    return ind - 1\n\n\ndef get_lr_func():\n    policy = 'lr_func_' + cfg.SOLVER.LR_POLICY\n    if policy not in globals():\n        raise NotImplementedError(\n            'Unknown LR policy: {}'.format(cfg.SOLVER.LR_POLICY))\n    else:\n        return globals()[policy]\n"
  },
  {
    "path": "detectron/utils/model_convert_utils.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n'''Helper functions for model conversion to pb'''\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom functools import wraps\nimport copy\nimport numpy as np\n\nfrom caffe2.python import core, workspace\nfrom caffe2.proto import caffe2_pb2\n\n\nclass OpFilter:\n    def __init__(self, **kwargs):\n        self.type = None\n        self.type_in = None\n        self.inputs = None\n        self.outputs = None\n        self.input_has = None\n        self.output_has = None\n        self.cond = None\n        self.reverse = False\n\n        assert all([x in self.__dict__ for x in kwargs])\n        self.__dict__.update(kwargs)\n\n    def check(self, op):\n        ret = self.reverse\n        if self.type and op.type != self.type:\n            return ret\n        if self.type_in and op.type not in self.type_in:\n            return ret\n        if self.inputs and set(op.input) != set(self.inputs):\n            return ret\n        if self.outputs and set(op.output) != set(self.outputs):\n            return ret\n        if self.input_has and self.input_has not in op.input:\n            return ret\n        if self.output_has and self.output_has not in op.output:\n            return ret\n        if self.cond is not None and not self.cond:\n            return ret\n        return not ret\n\n\ndef filter_op(op, **kwargs):\n    ''' Returns true if passed all checks '''\n    return OpFilter(**kwargs).check(op)\n\n\ndef op_filter(**filter_args):\n    ''' Returns None if no condition is satisfied '''\n    def actual_decorator(f):\n        @wraps(f)\n        def wrapper(op, **params):\n            if not filter_op(op, **filter_args):\n                return None\n            return f(op, **params)\n        return wrapper\n    return actual_decorator\n\n\ndef op_func_chain(convert_func_list):\n    ''' Run funcs one by one until func return is not None '''\n    assert isinstance(convert_func_list, list)\n\n    def _chain(op):\n        for x in convert_func_list:\n            ret = x(op)\n            if ret is not None:\n                return ret\n        return None\n\n    return _chain\n\n\ndef convert_op_in_ops(ops_ref, func_or_list):\n    func = func_or_list\n    if isinstance(func_or_list, list):\n        func = op_func_chain(func_or_list)\n    ops = [op for op in ops_ref]\n    converted_ops = []\n    for op in ops:\n        new_ops = func(op)\n        if new_ops is not None and not isinstance(new_ops, list):\n            new_ops = [new_ops]\n        converted_ops.extend(new_ops if new_ops is not None else [op])\n    del ops_ref[:]\n    # ops_ref maybe of type RepeatedCompositeFieldContainer\n    # which does not have append()\n    ops_ref.extend(converted_ops)\n\n\ndef convert_op_in_proto(proto, func_or_list):\n    convert_op_in_ops(proto.op, func_or_list)\n\n\ndef get_op_arg(op, arg_name):\n    for x in op.arg:\n        if x.name == arg_name:\n            return x\n    return None\n\n\ndef get_op_arg_valf(op, arg_name, default_val):\n    arg = get_op_arg(op, arg_name)\n    return arg.f if arg is not None else default_val\n\n\ndef update_mobile_engines(net):\n    for op in net.op:\n        if op.type == \"Conv\":\n            op.engine = \"NNPACK\"\n        if op.type == \"ConvTranspose\":\n            op.engine = \"BLOCK\"\n\n\ndef pairwise(iterable):\n    \"s -> (s0,s1), (s1,s2), (s2, s3), ...\"\n    from itertools import tee\n    a, b = tee(iterable)\n    next(b, None)\n    return zip(a, b)\n\n\ndef blob_uses(net, blob):\n    u = []\n    for i, op in enumerate(net.op):\n        if blob in op.input or blob in op.control_input:\n            u.append(i)\n    return u\n\n\ndef fuse_first_affine(net, params, removed_tensors):\n    net = copy.deepcopy(net)\n    params = copy.deepcopy(params)\n\n    for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):\n        if next_.input[0] != current.output[0]:\n            continue\n\n        if current.type not in (\"Conv\", \"ConvTranspose\") \\\n           or next_.type != \"AffineChannel\":\n            continue\n        if current.output[0] != next_.output[0] and \\\n                len(blob_uses(net, current.output[0])) != 1:\n            # Can't fuse if more than one user unless AffineChannel is inplace\n            continue\n\n        # else, can fuse\n        conv = current\n        affine = next_\n        fused_conv = copy.deepcopy(conv)\n        fused_conv.output[0] = affine.output[0]\n        conv_weight = params[conv.input[1]]\n        conv_has_bias = len(conv.input) > 2\n        conv_bias = params[conv.input[2]] if conv_has_bias else 0\n\n        A = params[affine.input[1]]\n        B = params[affine.input[2]]\n\n        # Thus, can just have the affine transform\n        # X * A + B\n        # where\n        # A = bn_scale * 1.0 / (sqrt(running_var + eps))\n        # B =  (bias - running_mean * (1.0 / sqrt(running_var + eps))\n        # * bn_scale)\n\n        # This identify should hold if we have correctly fused\n        # np.testing.assert_array_equal(\n        #     params[conv.output[0]] * A + B,\n        #     params[bn.output[0]])\n\n        # Now, we have that the computation made is the following:\n        # ((X `conv` W) + b) * A + B\n        # Then, we can simply fuse this as follows:\n        # (X `conv` (W * A)) + b * A + B\n        # which is simply\n        # (X `conv` Q) + C\n        # where\n\n        # Q = W * A\n        # C = b * A + B\n\n        # For ConvTranspose, from the view of convolutions as a\n        # Toepeliz multiplication, we have W_ = W^T, so the weights\n        # are laid out as (R, S, K, K) (vs (S, R, K, K) for a Conv),\n        # so the weights broadcast slightly differently. Remember, our\n        # BN scale 'B' is of size (S,)\n\n        A_ = A.reshape(-1, 1, 1, 1) if conv.type == \"Conv\" else \\\n            A.reshape(1, -1, 1, 1)\n\n        C = conv_bias * A + B\n        Q = conv_weight * A_\n\n        assert params[conv.input[1]].shape == Q.shape\n\n        params[conv.input[1]] = Q\n        if conv_has_bias:\n            assert params[conv.input[2]].shape == C.shape\n            params[conv.input[2]] = C\n        else:\n            # make af_bias to be bias of the conv layer\n            fused_conv.input.append(affine.input[2])\n            params[affine.input[2]] = B\n\n        new_ops = net.op[:i] + [fused_conv] + net.op[j + 1:]\n        del net.op[:]\n        if conv_has_bias:\n            del params[affine.input[2]]\n            removed_tensors.append(affine.input[2])\n        removed_tensors.append(affine.input[1])\n        del params[affine.input[1]]\n        net.op.extend(new_ops)\n        break\n    return net, params, removed_tensors\n\n\ndef fuse_affine(net, params, ignore_failure):\n    # Run until we hit a fixed point\n    removed_tensors = []\n    while True:\n        (next_net, next_params, removed_tensors) = \\\n            fuse_first_affine(net, params, removed_tensors)\n        if len(next_net.op) == len(net.op):\n            if (\n                any(op.type == \"AffineChannel\" for op in next_net.op) and\n                not ignore_failure\n            ):\n                raise Exception(\n                    \"Model contains AffineChannel op after fusion: %s\", next_net)\n            return (next_net, next_params, removed_tensors)\n        net, params, removed_tensors = (next_net, next_params, removed_tensors)\n\n\ndef fuse_net(fuse_func, net, blobs, ignore_failure=False):\n    is_core_net = isinstance(net, core.Net)\n    if is_core_net:\n        net = net.Proto()\n\n    net, params, removed_tensors = fuse_func(net, blobs, ignore_failure)\n    for rt in removed_tensors:\n        net.external_input.remove(rt)\n\n    if is_core_net:\n        net = core.Net(net)\n\n    return net, params\n\n\ndef fuse_net_affine(net, blobs):\n    return fuse_net(fuse_affine, net, blobs)\n\n\ndef add_tensor(net, name, blob):\n    ''' Create an operator to store the tensor 'blob',\n        run the operator to put the blob to workspace.\n        uint8 is stored as an array of string with one element.\n    '''\n    kTypeNameMapper = {\n        np.dtype('float32'): \"GivenTensorFill\",\n        np.dtype('int32'): \"GivenTensorIntFill\",\n        np.dtype('int64'): \"GivenTensorInt64Fill\",\n        np.dtype('uint8'): \"GivenTensorStringFill\",\n    }\n\n    shape = blob.shape\n    values = blob\n    # pass array of uint8 as a string to save storage\n    # storing uint8_t has a large overhead for now\n    if blob.dtype == np.dtype('uint8'):\n        shape = [1]\n        values = [str(blob.data)]\n\n    op = core.CreateOperator(\n        kTypeNameMapper[blob.dtype],\n        [], [name],\n        shape=shape,\n        values=values,\n        # arg=[\n        #     putils.MakeArgument(\"shape\", shape),\n        #     putils.MakeArgument(\"values\", values),\n        # ]\n    )\n    net.op.extend([op])\n\n\ndef gen_init_net_from_blobs(blobs, blobs_to_use=None, excluded_blobs=None):\n    ''' Generate an initialization net based on a blob dict '''\n    ret = caffe2_pb2.NetDef()\n    if blobs_to_use is None:\n        blobs_to_use = {x for x in blobs}\n    else:\n        blobs_to_use = copy.deepcopy(blobs_to_use)\n    if excluded_blobs is not None:\n        blobs_to_use = [x for x in blobs_to_use if x not in excluded_blobs]\n    for name in blobs_to_use:\n        blob = blobs[name]\n        if isinstance(blob, str):\n            print('Blob {} with type {} is not supported in generating init net,'\n                  ' skipped.'.format(name, type(blob)))\n            continue\n        add_tensor(ret, name, blob)\n\n    return ret\n\n\ndef get_ws_blobs(blob_names=None):\n    ''' Get blobs in 'blob_names' in the default workspace,\n        get all blobs if blob_names is None '''\n    blobs = {}\n    if blob_names is None:\n        blob_names = workspace.Blobs()\n    blobs = {x: workspace.FetchBlob(x) for x in blob_names}\n\n    return blobs\n\n\ndef get_device_option_cpu():\n    device_option = core.DeviceOption(caffe2_pb2.CPU)\n    return device_option\n\n\ndef get_device_option_cuda(gpu_id=0):\n    device_option = caffe2_pb2.DeviceOption()\n    device_option.device_type = caffe2_pb2.CUDA\n    device_option.device_id = gpu_id\n    return device_option\n\n\ndef create_input_blobs_for_net(net_def):\n    for op in net_def.op:\n        for blob_in in op.input:\n            if not workspace.HasBlob(blob_in):\n                workspace.CreateBlob(blob_in)\n\n\ndef compare_model(model1_func, model2_func, test_image, check_blobs):\n    ''' model_func(test_image, check_blobs)\n    '''\n    cb1, cb2 = check_blobs, check_blobs\n    if isinstance(check_blobs, dict):\n        cb1 = check_blobs.keys()\n        cb2 = check_blobs.values()\n    print('Running the first model...')\n    res1 = model1_func(test_image, check_blobs)\n    print('Running the second model...')\n    res2 = model2_func(test_image, check_blobs)\n    for idx in range(len(cb1)):\n        print('Checking {} -> {}...'.format(cb1[idx], cb2[idx]))\n        n1, n2 = cb1[idx], cb2[idx]\n        r1 = res1[n1] if n1 in res1 else None\n        r2 = res2[n2] if n2 in res2 else None\n        assert r1 is not None or r2 is None, \\\n            \"Blob {} in model1 is None\".format(n1)\n        assert r2 is not None or r1 is None, \\\n            \"Blob {} in model2 is None\".format(n2)\n        assert r1.shape == r2.shape, \\\n            \"Blob {} and {} shape mismatched: {} vs {}\".format(\n                n1, n2, r1.shape, r2.shape)\n\n        np.testing.assert_array_almost_equal(\n            r1, r2, decimal=3,\n            err_msg='{} and {} not matched. Max diff: {}'.format(\n                n1, n2, np.amax(np.absolute(r1 - r2))))\n\n    return True\n\n\n# graph_name could not contain word 'graph'\ndef save_graph(net, file_name, graph_name=\"net\", op_only=True):\n    from caffe2.python import net_drawer\n    graph = None\n    ops = net.op\n    if not op_only:\n        graph = net_drawer.GetPydotGraph(\n            ops, graph_name,\n            rankdir=\"TB\")\n    else:\n        graph = net_drawer.GetPydotGraphMinimal(\n            ops, graph_name,\n            rankdir=\"TB\", minimal_dependency=True)\n\n    try:\n        graph.write_png(file_name)\n    except Exception as e:\n        print('Error when writing graph to image {}'.format(e))\n"
  },
  {
    "path": "detectron/utils/net.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Helper functions for working with Caffe2 networks (i.e., operator graphs).\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import OrderedDict\nimport logging\nimport numpy as np\nimport os\nimport pprint\n\nfrom caffe2.python import core\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.core.config import load_cfg\nfrom detectron.utils.io import load_object\nfrom detectron.utils.io import save_object\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.env as envu\n\nlogger = logging.getLogger(__name__)\nlogger.setLevel(logging.INFO)\n\n\ndef initialize_from_weights_file(model, weights_file, broadcast=True):\n    \"\"\"Initialize a model from weights stored in a pickled dictionary. If\n    multiple GPUs are used, the loaded weights are synchronized on all GPUs,\n    unless 'broadcast' is False.\n    \"\"\"\n    initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)\n    if broadcast:\n        broadcast_parameters(model)\n\n\ndef initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):\n    \"\"\"Initialize a network with ops on a specific GPU.\n\n    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will\n    automatically map logical GPU ids (starting from 0) to the physical GPUs\n    specified in CUDA_VISIBLE_DEVICES.\n    \"\"\"\n    logger.info('Loading weights from: {}'.format(weights_file))\n    ws_blobs = workspace.Blobs()\n    src_blobs = load_object(weights_file)\n\n    if 'cfg' in src_blobs:\n        saved_cfg = load_cfg(src_blobs['cfg'])\n        configure_bbox_reg_weights(model, saved_cfg)\n    if 'blobs' in src_blobs:\n        # Backwards compat--dictionary used to be only blobs, now they are\n        # stored under the 'blobs' key\n        src_blobs = src_blobs['blobs']\n    # Initialize weights on GPU gpu_id only\n    unscoped_param_names = OrderedDict()  # Print these out in model order\n    for blob in model.params:\n        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True\n    with c2_utils.NamedCudaScope(gpu_id):\n        for unscoped_param_name in unscoped_param_names.keys():\n            if (unscoped_param_name.find(']_') >= 0 and\n                    unscoped_param_name not in src_blobs):\n                # Special case for sharing initialization from a pretrained\n                # model:\n                # If a blob named '_[xyz]_foo' is in model.params and not in\n                # the initialization blob dictionary, then load source blob\n                # 'foo' into destination blob '_[xyz]_foo'\n                src_name = unscoped_param_name[\n                    unscoped_param_name.find(']_') + 2:]\n            else:\n                src_name = unscoped_param_name\n            if src_name not in src_blobs:\n                logger.info('{:s} not found'.format(src_name))\n                continue\n            dst_name = core.ScopedName(unscoped_param_name)\n            has_momentum = src_name + '_momentum' in src_blobs\n            has_momentum_str = ' [+ momentum]' if has_momentum else ''\n            logger.info(\n                '{:s}{:} loaded from weights file into {:s}: {}'.format(\n                    src_name, has_momentum_str, dst_name, src_blobs[src_name]\n                    .shape\n                )\n            )\n            if dst_name in ws_blobs:\n                # If the blob is already in the workspace, make sure that it\n                # matches the shape of the loaded blob\n                ws_blob = workspace.FetchBlob(dst_name)\n                assert ws_blob.shape == src_blobs[src_name].shape, \\\n                    ('Workspace blob {} with shape {} does not match '\n                     'weights file shape {}').format(\n                        src_name,\n                        ws_blob.shape,\n                        src_blobs[src_name].shape)\n            workspace.FeedBlob(\n                dst_name,\n                src_blobs[src_name].astype(np.float32, copy=False))\n            if has_momentum:\n                workspace.FeedBlob(\n                    dst_name + '_momentum',\n                    src_blobs[src_name + '_momentum'].astype(\n                        np.float32, copy=False))\n\n    # We preserve blobs that are in the weights file but not used by the current\n    # model. We load these into CPU memory under the '__preserve__/' namescope.\n    # These blobs will be stored when saving a model to a weights file. This\n    # feature allows for alternating optimization of Faster R-CNN in which blobs\n    # unused by one step can still be preserved forward and used to initialize\n    # another step.\n    for src_name in src_blobs.keys():\n        if (src_name not in unscoped_param_names and\n                not src_name.endswith('_momentum') and\n                src_blobs[src_name] is not None):\n            with c2_utils.CpuScope():\n                workspace.FeedBlob(\n                    '__preserve__/{:s}'.format(src_name), src_blobs[src_name])\n                logger.info(\n                    '{:s} preserved in workspace (unused)'.format(src_name))\n\n\ndef save_model_to_weights_file(weights_file, model):\n    \"\"\"Stash model weights in a dictionary and pickle them to a file. We map\n    GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' ->\n    'conv1_w').\n    \"\"\"\n    logger.info(\n        'Saving parameters and momentum to {}'.format(\n            os.path.abspath(weights_file)))\n    blobs = {}\n    # Save all parameters\n    for param in model.params:\n        scoped_name = str(param)\n        unscoped_name = c2_utils.UnscopeName(scoped_name)\n        if unscoped_name not in blobs:\n            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))\n            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)\n    # Save momentum\n    for param in model.TrainableParams():\n        scoped_name = str(param) + '_momentum'\n        unscoped_name = c2_utils.UnscopeName(scoped_name)\n        if unscoped_name not in blobs:\n            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))\n            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)\n    # Save preserved blobs\n    for scoped_name in workspace.Blobs():\n        if scoped_name.startswith('__preserve__/'):\n            unscoped_name = c2_utils.UnscopeName(scoped_name)\n            if unscoped_name not in blobs:\n                logger.debug(\n                    ' {:s} -> {:s} (preserved)'.format(\n                        scoped_name, unscoped_name))\n                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)\n    cfg_yaml = envu.yaml_dump(cfg)\n    save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)\n\n\ndef broadcast_parameters(model):\n    \"\"\"Copy parameter blobs from GPU 0 over the corresponding parameter blobs\n    on GPUs 1 through cfg.NUM_GPUS - 1.\n    \"\"\"\n    if cfg.NUM_GPUS == 1:\n        # no-op if only running on a single GPU\n        return\n\n    def _do_broadcast(all_blobs):\n        assert len(all_blobs) % cfg.NUM_GPUS == 0, \\\n            ('Unexpected value for NUM_GPUS. Make sure you are not '\n             'running single-GPU inference with NUM_GPUS > 1.')\n        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)\n        for i in range(blobs_per_gpu):\n            blobs = [p for p in all_blobs[i::blobs_per_gpu]]\n            data = workspace.FetchBlob(blobs[0])\n            logger.debug('Broadcasting {} to'.format(str(blobs[0])))\n            for i, p in enumerate(blobs[1:]):\n                logger.debug(' |-> {}'.format(str(p)))\n                with c2_utils.CudaScope(i + 1):\n                    workspace.FeedBlob(p, data)\n\n    _do_broadcast(model.params)\n    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])\n\n\ndef sum_multi_gpu_blob(blob_name):\n    \"\"\"Return the sum of a scalar blob held on multiple GPUs.\"\"\"\n    val = 0\n    for i in range(cfg.NUM_GPUS):\n        val += float(workspace.FetchBlob('gpu_{}/{}'.format(i, blob_name)))\n    return val\n\n\ndef average_multi_gpu_blob(blob_name):\n    \"\"\"Return the average of a scalar blob held on multiple GPUs.\"\"\"\n    return sum_multi_gpu_blob(blob_name) / cfg.NUM_GPUS\n\n\ndef print_net(model, namescope='gpu_0'):\n    \"\"\"Print the model network.\"\"\"\n    logger.info('Printing model: {}'.format(model.net.Name()))\n    op_list = model.net.Proto().op\n    for op in op_list:\n        input_name = op.input\n        # For simplicity: only print the first output\n        # Not recommended if there are split layers\n        output_name = str(op.output[0])\n        op_type = op.type\n        op_name = op.name\n\n        if namescope is None or output_name.startswith(namescope):\n            # Only print the forward pass network\n            if output_name.find('grad') >= 0 or output_name.find('__m') >= 0:\n                continue\n\n            try:\n                # Under some conditions (e.g., dynamic memory optimization)\n                # it is possible that the network frees some blobs when they are\n                # no longer needed. Handle this case...\n                output_shape = workspace.FetchBlob(output_name).shape\n            except BaseException:\n                output_shape = '<unknown>'\n\n            first_blob = True\n            op_label = op_type + (op_name if op_name == '' else ':' + op_name)\n            suffix = ' ------- (op: {})'.format(op_label)\n            for j in range(len(input_name)):\n                if input_name[j] in model.params:\n                    continue\n                input_blob = workspace.FetchBlob(input_name[j])\n                if isinstance(input_blob, np.ndarray):\n                    input_shape = input_blob.shape\n                    logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format(\n                        c2_utils.UnscopeName(str(input_name[j])),\n                        '{}'.format(input_shape),\n                        c2_utils.UnscopeName(str(output_name)),\n                        '{}'.format(output_shape),\n                        suffix))\n                    if first_blob:\n                        first_blob = False\n                        suffix = ' ------|'\n    logger.info('End of model: {}'.format(model.net.Name()))\n\n\ndef configure_bbox_reg_weights(model, saved_cfg):\n    \"\"\"Compatibility for old models trained with bounding box regression\n    mean/std normalization (instead of fixed weights).\n    \"\"\"\n    if 'MODEL' not in saved_cfg or 'BBOX_REG_WEIGHTS' not in saved_cfg.MODEL:\n        logger.warning('Model from weights file was trained before config key '\n                       'MODEL.BBOX_REG_WEIGHTS was added. Forcing '\n                       'MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) to ensure '\n                       'correct **inference** behavior.')\n        # Generally we don't allow modifying the config, but this is a one-off\n        # hack to support some very old models\n        is_immutable = cfg.is_immutable()\n        cfg.immutable(False)\n        cfg.MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.)\n        cfg.immutable(is_immutable)\n        logger.info('New config:')\n        logger.info(pprint.pformat(cfg))\n        assert not model.train, (\n            'This model was trained with an older version of the code that '\n            'used bounding box regression mean/std normalization. It can no '\n            'longer be used for training. To upgrade it to a trainable model '\n            'please use fb/compat/convert_bbox_reg_normalized_model.py.'\n        )\n\n\ndef get_group_gn(dim):\n    \"\"\"\n    get number of groups used by GroupNorm, based on number of channels\n    \"\"\"\n    dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP\n    num_groups = cfg.GROUP_NORM.NUM_GROUPS\n\n    assert dim_per_gp == -1 or num_groups == -1, \\\n        \"GroupNorm: can only specify G or C/G.\"\n\n    if dim_per_gp > 0:\n        assert dim % dim_per_gp == 0\n        group_gn = dim // dim_per_gp\n    else:\n        assert dim % num_groups == 0\n        group_gn = num_groups\n    return group_gn\n"
  },
  {
    "path": "detectron/utils/segms.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Functions for interacting with segmentation masks in the COCO format.\n\nThe following terms are used in this module\n    mask: a binary mask encoded as a 2D numpy array\n    segm: a segmentation mask in one of the two COCO formats (polygon or RLE)\n    polygon: COCO's polygon format\n    RLE: COCO's run length encoding format\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport pycocotools.mask as mask_util\n\n# Type used for storing masks in polygon format\n_POLY_TYPE = list\n# Type used for storing masks in RLE format\n_RLE_TYPE = dict\n\n\ndef is_poly(segm):\n    \"\"\"Determine if segm is a polygon. Valid segm expected (polygon or RLE).\"\"\"\n    assert isinstance(segm, (_POLY_TYPE, _RLE_TYPE)), \\\n        'Invalid segm type: {}'.format(type(segm))\n    return isinstance(segm, _POLY_TYPE)\n\n\ndef flip_segms(segms, height, width):\n    \"\"\"Left/right flip each mask in a list of masks.\"\"\"\n    def _flip_poly(poly, width):\n        flipped_poly = np.array(poly)\n        flipped_poly[0::2] = width - np.array(poly[0::2]) - 1\n        return flipped_poly.tolist()\n\n    def _flip_rle(rle, height, width):\n        if 'counts' in rle and type(rle['counts']) == list:\n            # Magic RLE format handling painfully discovered by looking at the\n            # COCO API showAnns function.\n            rle = mask_util.frPyObjects([rle], height, width)\n        mask = mask_util.decode(rle)\n        mask = mask[:, ::-1, :]\n        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))\n        return rle\n\n    flipped_segms = []\n    for segm in segms:\n        if is_poly(segm):\n            # Polygon format\n            flipped_segms.append([_flip_poly(poly, width) for poly in segm])\n        else:\n            # RLE format\n            flipped_segms.append(_flip_rle(segm, height, width))\n    return flipped_segms\n\n\ndef polys_to_mask(polygons, height, width):\n    \"\"\"Convert from the COCO polygon segmentation format to a binary mask\n    encoded as a 2D array of data type numpy.float32. The polygon segmentation\n    is understood to be enclosed inside a height x width image. The resulting\n    mask is therefore of shape (height, width).\n    \"\"\"\n    rle = mask_util.frPyObjects(polygons, height, width)\n    mask = np.array(mask_util.decode(rle), dtype=np.float32)\n    # Flatten in case polygons was a list\n    mask = np.sum(mask, axis=2)\n    mask = np.array(mask > 0, dtype=np.float32)\n    return mask\n\n\ndef mask_to_bbox(mask):\n    \"\"\"Compute the tight bounding box of a binary mask.\"\"\"\n    xs = np.where(np.sum(mask, axis=0) > 0)[0]\n    ys = np.where(np.sum(mask, axis=1) > 0)[0]\n\n    if len(xs) == 0 or len(ys) == 0:\n        return None\n\n    x0 = xs[0]\n    x1 = xs[-1]\n    y0 = ys[0]\n    y1 = ys[-1]\n    return np.array((x0, y0, x1, y1), dtype=np.float32)\n\n\ndef polys_to_mask_wrt_box(polygons, box, M):\n    \"\"\"Convert from the COCO polygon segmentation format to a binary mask\n    encoded as a 2D array of data type numpy.float32. The polygon segmentation\n    is understood to be enclosed in the given box and rasterized to an M x M\n    mask. The resulting mask is therefore of shape (M, M).\n    \"\"\"\n    w = box[2] - box[0]\n    h = box[3] - box[1]\n\n    w = np.maximum(w, 1)\n    h = np.maximum(h, 1)\n\n    polygons_norm = []\n    for poly in polygons:\n        p = np.array(poly, dtype=np.float32)\n        p[0::2] = (p[0::2] - box[0]) * M / w\n        p[1::2] = (p[1::2] - box[1]) * M / h\n        polygons_norm.append(p)\n\n    rle = mask_util.frPyObjects(polygons_norm, M, M)\n    mask = np.array(mask_util.decode(rle), dtype=np.float32)\n    # Flatten in case polygons was a list\n    mask = np.sum(mask, axis=2)\n    mask = np.array(mask > 0, dtype=np.float32)\n    return mask\n\n\ndef polys_to_boxes(polys):\n    \"\"\"Convert a list of polygons into an array of tight bounding boxes.\"\"\"\n    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)\n    for i in range(len(polys)):\n        poly = polys[i]\n        x0 = min(min(p[::2]) for p in poly)\n        x1 = max(max(p[::2]) for p in poly)\n        y0 = min(min(p[1::2]) for p in poly)\n        y1 = max(max(p[1::2]) for p in poly)\n        boxes_from_polys[i, :] = [x0, y0, x1, y1]\n\n    return boxes_from_polys\n\n\ndef rle_mask_voting(\n    top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'\n):\n    \"\"\"Returns new masks (in correspondence with `top_masks`) by combining\n    multiple overlapping masks coming from the pool of `all_masks`. Two methods\n    for combining masks are supported: 'AVG' uses a weighted average of\n    overlapping mask pixels; 'UNION' takes the union of all mask pixels.\n    \"\"\"\n    if len(top_masks) == 0:\n        return\n\n    all_not_crowd = [False] * len(all_masks)\n    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)\n    decoded_all_masks = [\n        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks\n    ]\n    decoded_top_masks = [\n        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks\n    ]\n    all_boxes = all_dets[:, :4].astype(np.int32)\n    all_scores = all_dets[:, 4]\n\n    # Fill box support with weights\n    mask_shape = decoded_all_masks[0].shape\n    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))\n    for k in range(len(all_masks)):\n        ref_box = all_boxes[k]\n        x_0 = max(ref_box[0], 0)\n        x_1 = min(ref_box[2] + 1, mask_shape[1])\n        y_0 = max(ref_box[1], 0)\n        y_1 = min(ref_box[3] + 1, mask_shape[0])\n        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]\n    mask_weights = np.maximum(mask_weights, 1e-5)\n\n    top_segms_out = []\n    for k in range(len(top_masks)):\n        # Corner case of empty mask\n        if decoded_top_masks[k].sum() == 0:\n            top_segms_out.append(top_masks[k])\n            continue\n\n        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]\n        # Only matches itself\n        if len(inds_to_vote) == 1:\n            top_segms_out.append(top_masks[k])\n            continue\n\n        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]\n        if method == 'AVG':\n            ws = mask_weights[inds_to_vote]\n            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)\n            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)\n        elif method == 'UNION':\n            # Any pixel that's on joins the mask\n            soft_mask = np.sum(masks_to_vote, axis=0)\n            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)\n        else:\n            raise NotImplementedError('Method {} is unknown'.format(method))\n        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]\n        top_segms_out.append(rle)\n\n    return top_segms_out\n\n\ndef rle_mask_nms(masks, dets, thresh, mode='IOU'):\n    \"\"\"Performs greedy non-maximum suppression based on an overlap measurement\n    between masks. The type of measurement is determined by `mode` and can be\n    either 'IOU' (standard intersection over union) or 'IOMA' (intersection over\n    mininum area).\n    \"\"\"\n    if len(masks) == 0:\n        return []\n    if len(masks) == 1:\n        return [0]\n\n    if mode == 'IOU':\n        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))\n        all_not_crowds = [False] * len(masks)\n        ious = mask_util.iou(masks, masks, all_not_crowds)\n    elif mode == 'IOMA':\n        # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))\n        all_crowds = [True] * len(masks)\n        # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)\n        ious = mask_util.iou(masks, masks, all_crowds)\n        # ... = max(area(intersect(m1, m2)) / area(m2),\n        #           area(intersect(m2, m1)) / area(m1))\n        ious = np.maximum(ious, ious.transpose())\n    elif mode == 'CONTAINMENT':\n        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)\n        # Which measures how much m2 is contained inside m1\n        all_crowds = [True] * len(masks)\n        ious = mask_util.iou(masks, masks, all_crowds)\n    else:\n        raise NotImplementedError('Mode {} is unknown'.format(mode))\n\n    scores = dets[:, 4]\n    order = np.argsort(-scores)\n\n    keep = []\n    while order.size > 0:\n        i = order[0]\n        keep.append(i)\n        ovr = ious[i, order[1:]]\n        inds_to_keep = np.where(ovr <= thresh)[0]\n        order = order[inds_to_keep + 1]\n\n    return keep\n\n\ndef rle_masks_to_boxes(masks):\n    \"\"\"Computes the bounding box of each mask in a list of RLE encoded masks.\"\"\"\n    if len(masks) == 0:\n        return []\n\n    decoded_masks = [\n        np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks\n    ]\n\n    def get_bounds(flat_mask):\n        inds = np.where(flat_mask > 0)[0]\n        return inds.min(), inds.max()\n\n    boxes = np.zeros((len(decoded_masks), 4))\n    keep = [True] * len(decoded_masks)\n    for i, mask in enumerate(decoded_masks):\n        if mask.sum() == 0:\n            keep[i] = False\n            continue\n        flat_mask = mask.sum(axis=0)\n        x0, x1 = get_bounds(flat_mask)\n        flat_mask = mask.sum(axis=1)\n        y0, y1 = get_bounds(flat_mask)\n        boxes[i, :] = (x0, y0, x1, y1)\n\n    return boxes, np.where(keep)[0]\n"
  },
  {
    "path": "detectron/utils/subprocess.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Primitives for running multiple single-GPU jobs in parallel over subranges of\ndata. These are used for running multi-GPU inference. Subprocesses are used to\navoid the GIL since inference may involve non-trivial amounts of Python code.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport os\nimport numpy as np\nimport subprocess\nfrom six.moves import shlex_quote\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.io import load_object\nimport detectron.utils.env as envu\n\nimport logging\nlogger = logging.getLogger(__name__)\n\n\ndef process_in_parallel(\n    tag, total_range_size, binary, output_dir, opts=''\n):\n    \"\"\"Run the specified binary cfg.NUM_GPUS times in parallel, each time as a\n    subprocess that uses one GPU. The binary must accept the command line\n    arguments `--range {start} {end}` that specify a data processing range.\n    \"\"\"\n    # Snapshot the current cfg state in order to pass to the inference\n    # subprocesses\n    cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))\n    with open(cfg_file, 'w') as f:\n        envu.yaml_dump(cfg, stream=f)\n    subprocess_env = os.environ.copy()\n    processes = []\n    subinds = np.array_split(range(total_range_size), cfg.NUM_GPUS)\n    # Determine GPUs to use\n    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')\n    if cuda_visible_devices:\n        gpu_inds = map(int, cuda_visible_devices.split(','))\n        assert -1 not in gpu_inds, \\\n            'Hiding GPU indices using the \\'-1\\' index is not supported'\n    else:\n        gpu_inds = range(cfg.NUM_GPUS)\n    # Run the binary in cfg.NUM_GPUS subprocesses\n    for i, gpu_ind in enumerate(gpu_inds):\n        start = subinds[i][0]\n        end = subinds[i][-1] + 1\n        subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)\n        cmd = '{binary} --range {start} {end} --cfg {cfg_file} NUM_GPUS 1 {opts}'\n        cmd = cmd.format(\n            binary=shlex_quote(binary),\n            start=int(start),\n            end=int(end),\n            cfg_file=shlex_quote(cfg_file),\n            opts=' '.join([shlex_quote(opt) for opt in opts])\n        )\n        logger.info('{} range command {}: {}'.format(tag, i, cmd))\n        if i == 0:\n            subprocess_stdout = subprocess.PIPE\n        else:\n            filename = os.path.join(\n                output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)\n            )\n            subprocess_stdout = open(filename, 'w')  # NOQA (close below)\n        p = subprocess.Popen(\n            cmd,\n            shell=True,\n            env=subprocess_env,\n            stdout=subprocess_stdout,\n            stderr=subprocess.STDOUT,\n            bufsize=1\n        )\n        processes.append((i, p, start, end, subprocess_stdout))\n    # Log output from inference processes and collate their results\n    outputs = []\n    for i, p, start, end, subprocess_stdout in processes:\n        log_subprocess_output(i, p, output_dir, tag, start, end)\n        if i > 0:\n            subprocess_stdout.close()\n        range_file = os.path.join(\n            output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)\n        )\n        range_data = load_object(range_file)\n        outputs.append(range_data)\n    return outputs\n\n\ndef log_subprocess_output(i, p, output_dir, tag, start, end):\n    \"\"\"Capture the output of each subprocess and log it in the parent process.\n    The first subprocess's output is logged in realtime. The output from the\n    other subprocesses is buffered and then printed all at once (in order) when\n    subprocesses finish.\n    \"\"\"\n    outfile = os.path.join(\n        output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)\n    )\n    logger.info('# ' + '-' * 76 + ' #')\n    logger.info(\n        'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)\n    )\n    logger.info('# ' + '-' * 76 + ' #')\n    if i == 0:\n        # Stream the piped stdout from the first subprocess in realtime\n        with open(outfile, 'wb') as f:\n            for line in iter(p.stdout.readline, b''):\n                print(line.rstrip().decode(\"utf8\"))\n                f.write(line)\n        p.stdout.close()\n        ret = p.wait()\n    else:\n        # For subprocesses >= 1, wait and dump their log file\n        ret = p.wait()\n        with open(outfile, 'r') as f:\n            print(''.join(f.readlines()))\n    assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)\n"
  },
  {
    "path": "detectron/utils/timer.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Timing related functions.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport time\n\n\nclass Timer:\n    \"\"\"A simple timer.\"\"\"\n\n    def __init__(self):\n        self.reset()\n\n    def tic(self):\n        # using time.time instead of time.clock because time time.clock\n        # does not normalize for multithreading\n        self.start_time = time.time()\n\n    def toc(self, average=True):\n        self.diff = time.time() - self.start_time\n        self.total_time += self.diff\n        self.calls += 1\n        self.average_time = self.total_time / self.calls\n        if average:\n            return self.average_time\n        else:\n            return self.diff\n\n    def reset(self):\n        self.total_time = 0.\n        self.calls = 0\n        self.start_time = 0.\n        self.diff = 0.\n        self.average_time = 0.\n"
  },
  {
    "path": "detectron/utils/train.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Utilities driving the train_net binary\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom shutil import copyfile\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport logging\nimport numpy as np\nimport os\nimport re\n\nfrom caffe2.python import memonger\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import cfg\nfrom detectron.core.config import get_output_dir\nfrom detectron.datasets.roidb import combined_roidb_for_training\nfrom detectron.modeling import model_builder\nfrom detectron.utils import lr_policy\nfrom detectron.utils.training_stats import TrainingStats\nimport detectron.utils.env as envu\nimport detectron.utils.net as nu\n\n\ndef train_model():\n    \"\"\"Model training loop.\"\"\"\n    model, weights_file, start_iter, checkpoints, output_dir = create_model()\n    if 'final' in checkpoints:\n        # The final model was found in the output directory, so nothing to do\n        return checkpoints\n\n    setup_model_for_training(model, weights_file, output_dir)\n    training_stats = TrainingStats(model)\n    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)\n\n    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):\n        if model.roi_data_loader.has_stopped():\n            handle_critical_error(model, 'roi_data_loader failed')\n        training_stats.IterTic()\n        lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter))\n        workspace.RunNet(model.net.Proto().name)\n        if cur_iter == start_iter:\n            nu.print_net(model)\n        training_stats.IterToc()\n        training_stats.UpdateIterStats()\n        training_stats.LogIterStats(cur_iter, lr)\n\n        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:\n            checkpoints[cur_iter] = os.path.join(\n                output_dir, 'model_iter{}.pkl'.format(cur_iter)\n            )\n            nu.save_model_to_weights_file(checkpoints[cur_iter], model)\n\n        if cur_iter == start_iter + training_stats.LOG_PERIOD:\n            # Reset the iteration timer to remove outliers from the first few\n            # SGD iterations\n            training_stats.ResetIterTimer()\n\n        if np.isnan(training_stats.iter_total_loss):\n            handle_critical_error(model, 'Loss is NaN')\n\n    # Save the final model\n    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')\n    nu.save_model_to_weights_file(checkpoints['final'], model)\n    # Shutdown data loading threads\n    model.roi_data_loader.shutdown()\n    return checkpoints\n\n\ndef handle_critical_error(model, msg):\n    logger = logging.getLogger(__name__)\n    logger.critical(msg)\n    model.roi_data_loader.shutdown()\n    raise Exception(msg)\n\n\ndef create_model():\n    \"\"\"Build the model and look for saved model checkpoints in case we can\n    resume from one.\n    \"\"\"\n    logger = logging.getLogger(__name__)\n    start_iter = 0\n    checkpoints = {}\n    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)\n    weights_file = cfg.TRAIN.WEIGHTS\n    if cfg.TRAIN.AUTO_RESUME:\n        # Check for the final model (indicates training already finished)\n        final_path = os.path.join(output_dir, 'model_final.pkl')\n        if os.path.exists(final_path):\n            logger.info('model_final.pkl exists; no need to train!')\n            return None, None, None, {'final': final_path}, output_dir\n\n        if cfg.TRAIN.COPY_WEIGHTS:\n            copyfile(\n                weights_file,\n                os.path.join(output_dir, os.path.basename(weights_file)))\n            logger.info('Copy {} to {}'.format(weights_file, output_dir))\n\n        # Find the most recent checkpoint (highest iteration number)\n        files = os.listdir(output_dir)\n        for f in files:\n            iter_string = re.findall(r'(?<=model_iter)\\d+(?=\\.pkl)', f)\n            if len(iter_string) > 0:\n                checkpoint_iter = int(iter_string[0])\n                if checkpoint_iter > start_iter:\n                    # Start one iteration immediately after the checkpoint iter\n                    start_iter = checkpoint_iter + 1\n                    resume_weights_file = f\n\n        if start_iter > 0:\n            # Override the initialization weights with the found checkpoint\n            weights_file = os.path.join(output_dir, resume_weights_file)\n            logger.info(\n                '========> Resuming from checkpoint {} at start iter {}'.\n                format(weights_file, start_iter)\n            )\n\n    logger.info('Building model: {}'.format(cfg.MODEL.TYPE))\n    model = model_builder.create(cfg.MODEL.TYPE, train=True)\n    if cfg.MEMONGER:\n        optimize_memory(model)\n    # Performs random weight initialization as defined by the model\n    workspace.RunNetOnce(model.param_init_net)\n    return model, weights_file, start_iter, checkpoints, output_dir\n\n\ndef optimize_memory(model):\n    \"\"\"Save GPU memory through blob sharing.\"\"\"\n    for device in range(cfg.NUM_GPUS):\n        namescope = 'gpu_{}/'.format(device)\n        losses = [namescope + l for l in model.losses]\n        model.net._net = memonger.share_grad_blobs(\n            model.net,\n            losses,\n            set(model.param_to_grad.values()),\n            namescope,\n            share_activations=cfg.MEMONGER_SHARE_ACTIVATIONS\n        )\n\n\ndef setup_model_for_training(model, weights_file, output_dir):\n    \"\"\"Loaded saved weights and create the network in the C2 workspace.\"\"\"\n    logger = logging.getLogger(__name__)\n    add_model_training_inputs(model)\n\n    if weights_file:\n        # Override random weight initialization with weights from a saved model\n        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)\n    # Even if we're randomly initializing we still need to synchronize\n    # parameters across GPUs\n    nu.broadcast_parameters(model)\n    workspace.CreateNet(model.net)\n\n    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))\n    dump_proto_files(model, output_dir)\n\n    # Start loading mini-batches and enqueuing blobs\n    model.roi_data_loader.register_sigint_handler()\n    model.roi_data_loader.start(prefill=True)\n    return output_dir\n\n\ndef add_model_training_inputs(model):\n    \"\"\"Load the training dataset and attach the training inputs to the model.\"\"\"\n    logger = logging.getLogger(__name__)\n    logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS))\n    roidb = combined_roidb_for_training(\n        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES\n    )\n    logger.info('{:d} roidb entries'.format(len(roidb)))\n    model_builder.add_training_inputs(model, roidb=roidb)\n\n\ndef dump_proto_files(model, output_dir):\n    \"\"\"Save prototxt descriptions of the training network and parameter\n    initialization network.\"\"\"\n    with open(os.path.join(output_dir, 'net.pbtxt'), 'w') as fid:\n        fid.write(str(model.net.Proto()))\n    with open(os.path.join(output_dir, 'param_init_net.pbtxt'), 'w') as fid:\n        fid.write(str(model.param_init_net.Proto()))\n"
  },
  {
    "path": "detectron/utils/training_stats.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Utilities for training.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport datetime\nimport numpy as np\n\nfrom caffe2.python import utils as c2_py_utils\n\nfrom detectron.core.config import cfg\nfrom detectron.utils.logging import log_json_stats\nfrom detectron.utils.logging import SmoothedValue\nfrom detectron.utils.timer import Timer\nimport detectron.utils.net as nu\n\n\nclass TrainingStats:\n    \"\"\"Track vital training statistics.\"\"\"\n\n    def __init__(self, model):\n        # Window size for smoothing tracked values (with median filtering)\n        self.WIN_SZ = 20\n        # Output logging period in SGD iterations\n        self.LOG_PERIOD = 20\n        self.smoothed_losses_and_metrics = {\n            key: SmoothedValue(self.WIN_SZ)\n            for key in model.losses + model.metrics\n        }\n        self.losses_and_metrics = {\n            key: 0\n            for key in model.losses + model.metrics\n        }\n        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)\n        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)\n        self.iter_total_loss = np.nan\n        self.iter_timer = Timer()\n        self.model = model\n\n    def IterTic(self):\n        self.iter_timer.tic()\n\n    def IterToc(self):\n        return self.iter_timer.toc(average=False)\n\n    def ResetIterTimer(self):\n        self.iter_timer.reset()\n\n    def UpdateIterStats(self):\n        \"\"\"Update tracked iteration statistics.\"\"\"\n        for k in self.losses_and_metrics.keys():\n            if k in self.model.losses:\n                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)\n            else:\n                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)\n        for k, v in self.smoothed_losses_and_metrics.items():\n            v.AddValue(self.losses_and_metrics[k])\n        self.iter_total_loss = np.sum(\n            np.array([self.losses_and_metrics[k] for k in self.model.losses])\n        )\n        self.smoothed_total_loss.AddValue(self.iter_total_loss)\n        self.smoothed_mb_qsize.AddValue(\n            self.model.roi_data_loader._minibatch_queue.qsize()\n        )\n\n    def LogIterStats(self, cur_iter, lr):\n        \"\"\"Log the tracked statistics.\"\"\"\n        if (cur_iter % self.LOG_PERIOD == 0 or\n                cur_iter == cfg.SOLVER.MAX_ITER - 1):\n            stats = self.GetStats(cur_iter, lr)\n            log_json_stats(stats)\n\n    def GetStats(self, cur_iter, lr):\n        eta_seconds = self.iter_timer.average_time * (\n            cfg.SOLVER.MAX_ITER - cur_iter\n        )\n        eta = str(datetime.timedelta(seconds=int(eta_seconds)))\n        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()\n        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])\n        stats = dict(\n            iter=cur_iter,\n            lr=float(lr),\n            time=self.iter_timer.average_time,\n            loss=self.smoothed_total_loss.GetMedianValue(),\n            eta=eta,\n            mb_qsize=int(\n                np.round(self.smoothed_mb_qsize.GetMedianValue())\n            ),\n            mem=int(np.ceil(mem_usage / 1024 / 1024))\n        )\n        for k, v in self.smoothed_losses_and_metrics.items():\n            stats[k] = v.GetMedianValue()\n        return stats\n"
  },
  {
    "path": "detectron/utils/vis.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Detection output visualization module.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport cv2\nimport numpy as np\nimport os\n\nimport pycocotools.mask as mask_util\n\nfrom detectron.utils.colormap import colormap\nimport detectron.utils.env as envu\nimport detectron.utils.keypoints as keypoint_utils\n\n# Matplotlib requires certain adjustments in some environments\n# Must happen before importing matplotlib\nenvu.set_up_matplotlib()\nimport matplotlib.pyplot as plt\nfrom matplotlib.patches import Polygon\n\nplt.rcParams['pdf.fonttype'] = 42  # For editing in Adobe Illustrator\n\n\n_GRAY = (218, 227, 218)\n_GREEN = (18, 127, 15)\n_WHITE = (255, 255, 255)\n\n\ndef kp_connections(keypoints):\n    kp_lines = [\n        [keypoints.index('left_eye'), keypoints.index('right_eye')],\n        [keypoints.index('left_eye'), keypoints.index('nose')],\n        [keypoints.index('right_eye'), keypoints.index('nose')],\n        [keypoints.index('right_eye'), keypoints.index('right_ear')],\n        [keypoints.index('left_eye'), keypoints.index('left_ear')],\n        [keypoints.index('right_shoulder'), keypoints.index('right_elbow')],\n        [keypoints.index('right_elbow'), keypoints.index('right_wrist')],\n        [keypoints.index('left_shoulder'), keypoints.index('left_elbow')],\n        [keypoints.index('left_elbow'), keypoints.index('left_wrist')],\n        [keypoints.index('right_hip'), keypoints.index('right_knee')],\n        [keypoints.index('right_knee'), keypoints.index('right_ankle')],\n        [keypoints.index('left_hip'), keypoints.index('left_knee')],\n        [keypoints.index('left_knee'), keypoints.index('left_ankle')],\n        [keypoints.index('right_shoulder'), keypoints.index('left_shoulder')],\n        [keypoints.index('right_hip'), keypoints.index('left_hip')],\n    ]\n    return kp_lines\n\n\ndef convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):\n    \"\"\"Convert from the class boxes/segms/keyps format generated by the testing\n    code.\n    \"\"\"\n    box_list = [b for b in cls_boxes if len(b) > 0]\n    if len(box_list) > 0:\n        boxes = np.concatenate(box_list)\n    else:\n        boxes = None\n    if cls_segms is not None:\n        segms = [s for slist in cls_segms for s in slist]\n    else:\n        segms = None\n    if cls_keyps is not None:\n        keyps = [k for klist in cls_keyps for k in klist]\n    else:\n        keyps = None\n    classes = []\n    for j in range(len(cls_boxes)):\n        classes += [j] * len(cls_boxes[j])\n    return boxes, segms, keyps, classes\n\n\ndef get_class_string(class_index, score, dataset):\n    class_text = dataset.classes[class_index] if dataset is not None else \\\n        'id{:d}'.format(class_index)\n    return class_text + ' {:0.2f}'.format(score).lstrip('0')\n\n\ndef vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):\n    \"\"\"Visualizes a single binary mask.\"\"\"\n\n    img = img.astype(np.float32)\n    idx = np.nonzero(mask)\n\n    img[idx[0], idx[1], :] *= 1.0 - alpha\n    img[idx[0], idx[1], :] += alpha * col\n\n    if show_border:\n        contours = cv2.findContours(\n            mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]\n        cv2.drawContours(img, contours, -1, _WHITE, border_thick, cv2.LINE_AA)\n\n    return img.astype(np.uint8)\n\n\ndef vis_class(img, pos, class_str, font_scale=0.35):\n    \"\"\"Visualizes the class.\"\"\"\n    img = img.astype(np.uint8)\n    x0, y0 = int(pos[0]), int(pos[1])\n    # Compute text size.\n    txt = class_str\n    font = cv2.FONT_HERSHEY_SIMPLEX\n    ((txt_w, txt_h), _) = cv2.getTextSize(txt, font, font_scale, 1)\n    # Place text background.\n    back_tl = x0, y0 - int(1.3 * txt_h)\n    back_br = x0 + txt_w, y0\n    cv2.rectangle(img, back_tl, back_br, _GREEN, -1)\n    # Show text.\n    txt_tl = x0, y0 - int(0.3 * txt_h)\n    cv2.putText(img, txt, txt_tl, font, font_scale, _GRAY, lineType=cv2.LINE_AA)\n    return img\n\n\ndef vis_bbox(img, bbox, thick=1):\n    \"\"\"Visualizes a bounding box.\"\"\"\n    img = img.astype(np.uint8)\n    (x0, y0, w, h) = bbox\n    x1, y1 = int(x0 + w), int(y0 + h)\n    x0, y0 = int(x0), int(y0)\n    cv2.rectangle(img, (x0, y0), (x1, y1), _GREEN, thickness=thick)\n    return img\n\n\ndef vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):\n    \"\"\"Visualizes keypoints (adapted from vis_one_image).\n    kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).\n    \"\"\"\n    dataset_keypoints, _ = keypoint_utils.get_keypoints()\n    kp_lines = kp_connections(dataset_keypoints)\n\n    # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.\n    cmap = plt.get_cmap('rainbow')\n    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]\n    colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]\n\n    # Perform the drawing on a copy of the image, to allow for blending.\n    kp_mask = np.copy(img)\n\n    # Draw mid shoulder / mid hip first for better visualization.\n    mid_shoulder = (\n        kps[:2, dataset_keypoints.index('right_shoulder')] +\n        kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0\n    sc_mid_shoulder = np.minimum(\n        kps[2, dataset_keypoints.index('right_shoulder')],\n        kps[2, dataset_keypoints.index('left_shoulder')])\n    mid_hip = (\n        kps[:2, dataset_keypoints.index('right_hip')] +\n        kps[:2, dataset_keypoints.index('left_hip')]) / 2.0\n    sc_mid_hip = np.minimum(\n        kps[2, dataset_keypoints.index('right_hip')],\n        kps[2, dataset_keypoints.index('left_hip')])\n    nose_idx = dataset_keypoints.index('nose')\n    if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:\n        cv2.line(\n            kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),\n            color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)\n    if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:\n        cv2.line(\n            kp_mask, tuple(mid_shoulder), tuple(mid_hip),\n            color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)\n\n    # Draw the keypoints.\n    for l in range(len(kp_lines)):\n        i1 = kp_lines[l][0]\n        i2 = kp_lines[l][1]\n        p1 = kps[0, i1], kps[1, i1]\n        p2 = kps[0, i2], kps[1, i2]\n        if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:\n            cv2.line(\n                kp_mask, p1, p2,\n                color=colors[l], thickness=2, lineType=cv2.LINE_AA)\n        if kps[2, i1] > kp_thresh:\n            cv2.circle(\n                kp_mask, p1,\n                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)\n        if kps[2, i2] > kp_thresh:\n            cv2.circle(\n                kp_mask, p2,\n                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)\n\n    # Blend the keypoints.\n    return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)\n\n\ndef vis_one_image_opencv(\n        im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2,\n        show_box=False, dataset=None, show_class=False):\n    \"\"\"Constructs a numpy array with the detections visualized.\"\"\"\n\n    if isinstance(boxes, list):\n        boxes, segms, keypoints, classes = convert_from_cls_format(\n            boxes, segms, keypoints)\n\n    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:\n        return im\n\n    if segms is not None and len(segms) > 0:\n        masks = mask_util.decode(segms)\n        color_list = colormap()\n        mask_color_id = 0\n\n    # Display in largest to smallest order to reduce occlusion\n    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n    sorted_inds = np.argsort(-areas)\n\n    for i in sorted_inds:\n        bbox = boxes[i, :4]\n        score = boxes[i, -1]\n        if score < thresh:\n            continue\n\n        # show box (off by default)\n        if show_box:\n            im = vis_bbox(\n                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))\n\n        # show class (off by default)\n        if show_class:\n            class_str = get_class_string(classes[i], score, dataset)\n            im = vis_class(im, (bbox[0], bbox[1] - 2), class_str)\n\n        # show mask\n        if segms is not None and len(segms) > i:\n            color_mask = color_list[mask_color_id % len(color_list), 0:3]\n            mask_color_id += 1\n            im = vis_mask(im, masks[..., i], color_mask)\n\n        # show keypoints\n        if keypoints is not None and len(keypoints) > i:\n            im = vis_keypoints(im, keypoints[i], kp_thresh)\n\n    return im\n\n\ndef vis_one_image(\n        im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9,\n        kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False,\n        ext='pdf', out_when_no_box=False):\n    \"\"\"Visual debugging of detections.\"\"\"\n    if not os.path.exists(output_dir):\n        os.makedirs(output_dir)\n\n    if isinstance(boxes, list):\n        boxes, segms, keypoints, classes = convert_from_cls_format(\n            boxes, segms, keypoints)\n\n    if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box:\n        return\n\n    dataset_keypoints, _ = keypoint_utils.get_keypoints()\n\n    if segms is not None and len(segms) > 0:\n        masks = mask_util.decode(segms)\n\n    color_list = colormap(rgb=True) / 255\n\n    kp_lines = kp_connections(dataset_keypoints)\n    cmap = plt.get_cmap('rainbow')\n    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]\n\n    fig = plt.figure(frameon=False)\n    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)\n    ax = plt.Axes(fig, [0., 0., 1., 1.])\n    ax.axis('off')\n    fig.add_axes(ax)\n    ax.imshow(im)\n\n    if boxes is None:\n        sorted_inds = [] # avoid crash when 'boxes' is None\n    else:\n        # Display in largest to smallest order to reduce occlusion\n        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n        sorted_inds = np.argsort(-areas)\n\n    mask_color_id = 0\n    for i in sorted_inds:\n        bbox = boxes[i, :4]\n        score = boxes[i, -1]\n        if score < thresh:\n            continue\n\n        # show box (off by default)\n        ax.add_patch(\n            plt.Rectangle((bbox[0], bbox[1]),\n                          bbox[2] - bbox[0],\n                          bbox[3] - bbox[1],\n                          fill=False, edgecolor='g',\n                          linewidth=0.5, alpha=box_alpha))\n\n        if show_class:\n            ax.text(\n                bbox[0], bbox[1] - 2,\n                get_class_string(classes[i], score, dataset),\n                fontsize=3,\n                family='serif',\n                bbox=dict(\n                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),\n                color='white')\n\n        # show mask\n        if segms is not None and len(segms) > i:\n            img = np.ones(im.shape)\n            color_mask = color_list[mask_color_id % len(color_list), 0:3]\n            mask_color_id += 1\n\n            w_ratio = .4\n            for c in range(3):\n                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio\n            for c in range(3):\n                img[:, :, c] = color_mask[c]\n            e = masks[:, :, i]\n\n            contour = cv2.findContours(\n                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]\n\n            for c in contour:\n                polygon = Polygon(\n                    c.reshape((-1, 2)),\n                    fill=True, facecolor=color_mask,\n                    edgecolor='w', linewidth=1.2,\n                    alpha=0.5)\n                ax.add_patch(polygon)\n\n        # show keypoints\n        if keypoints is not None and len(keypoints) > i:\n            kps = keypoints[i]\n            plt.autoscale(False)\n            for l in range(len(kp_lines)):\n                i1 = kp_lines[l][0]\n                i2 = kp_lines[l][1]\n                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:\n                    x = [kps[0, i1], kps[0, i2]]\n                    y = [kps[1, i1], kps[1, i2]]\n                    line = plt.plot(x, y)\n                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)\n                if kps[2, i1] > kp_thresh:\n                    plt.plot(\n                        kps[0, i1], kps[1, i1], '.', color=colors[l],\n                        markersize=3.0, alpha=0.7)\n\n                if kps[2, i2] > kp_thresh:\n                    plt.plot(\n                        kps[0, i2], kps[1, i2], '.', color=colors[l],\n                        markersize=3.0, alpha=0.7)\n\n            # add mid shoulder / mid hip for better visualization\n            mid_shoulder = (\n                kps[:2, dataset_keypoints.index('right_shoulder')] +\n                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0\n            sc_mid_shoulder = np.minimum(\n                kps[2, dataset_keypoints.index('right_shoulder')],\n                kps[2, dataset_keypoints.index('left_shoulder')])\n            mid_hip = (\n                kps[:2, dataset_keypoints.index('right_hip')] +\n                kps[:2, dataset_keypoints.index('left_hip')]) / 2.0\n            sc_mid_hip = np.minimum(\n                kps[2, dataset_keypoints.index('right_hip')],\n                kps[2, dataset_keypoints.index('left_hip')])\n            if (sc_mid_shoulder > kp_thresh and\n                    kps[2, dataset_keypoints.index('nose')] > kp_thresh):\n                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]\n                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]\n                line = plt.plot(x, y)\n                plt.setp(\n                    line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)\n            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:\n                x = [mid_shoulder[0], mid_hip[0]]\n                y = [mid_shoulder[1], mid_hip[1]]\n                line = plt.plot(x, y)\n                plt.setp(\n                    line, color=colors[len(kp_lines) + 1], linewidth=1.0,\n                    alpha=0.7)\n\n    output_name = os.path.basename(im_name) + '.' + ext\n    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)\n    plt.close('all')\n"
  },
  {
    "path": "docker/Dockerfile",
    "content": "# Use Caffe2 image as parent image\nFROM caffe2/caffe2:snapshot-py2-cuda9.0-cudnn7-ubuntu16.04\n\nRUN mv /usr/local/caffe2 /usr/local/caffe2_build\nENV Caffe2_DIR /usr/local/caffe2_build\n\nENV PYTHONPATH /usr/local/caffe2_build:${PYTHONPATH}\nENV LD_LIBRARY_PATH /usr/local/caffe2_build/lib:${LD_LIBRARY_PATH}\n\n# Clone the Detectron repository\nRUN git clone https://github.com/facebookresearch/detectron /detectron\n\n# Install Python dependencies\nRUN pip install -r /detectron/requirements.txt\n\n# Install the COCO API\nRUN git clone https://github.com/cocodataset/cocoapi.git /cocoapi\nWORKDIR /cocoapi/PythonAPI\nRUN make install\n\n# Go to Detectron root\nWORKDIR /detectron\n\n# Set up Python modules\nRUN make\n\n# [Optional] Build custom ops\nRUN make ops\n"
  },
  {
    "path": "projects/GN/README.md",
    "content": "# Group Normalization for Mask R-CNN\n\n<div align=\"center\">\n  <img src=\"gn.jpg\" width=\"700px\" />\n</div>\n\n## Introduction\n\nThis file provides Mask R-CNN baseline results and models trained with [Group Normalization](https://arxiv.org/abs/1803.08494):\n\n```\n@article{GroupNorm2018,\n  title={Group Normalization},\n  author={Yuxin Wu and Kaiming He},\n  journal={arXiv:1803.08494},\n  year={2018}\n}\n```\n\n**Note:** This code uses the GroupNorm op implemented in CUDA, included in the Caffe2 repo. When writing this document, Caffe2 is being merged into PyTorch, and the GroupNorm op is located [here](https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu). Make sure your Caffe2 is up to date.\n\n## Pretrained Models with GN\n\nThese models are trained in Caffe2 on the standard ImageNet-1k dataset, using GroupNorm with 32 groups (G=32).\n\n- [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): ResNet-50 with GN, 24.0\\% top-1 error (center-crop).\n- [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): ResNet-101 with GN, 22.6\\% top-1 error (center-crop).\n\n## Results\n\n### Baselines with BN\n\n<table><tbody>\n<!-- START E2E MASK RCNN BN TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN, BN*</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>8.6</sub></sup></td>\n<td align=\"right\"><sup><sub>0.897</sub></sup></td>\n<td align=\"right\"><sup><sub>44.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.099&nbsp;+&nbsp;0.018</sub></sup></td>\n<td align=\"right\"><sup><sub>38.6</sub></sup></td>\n<td align=\"right\"><sup><sub>34.5</sub></sup></td>\n<td align=\"right\"><sup><sub>35859007</sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN, BN*</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.993</sub></sup></td>\n<td align=\"right\"><sup><sub>49.7</sub></sup></td>\n<td align=\"right\"><sup><sub>0.126&nbsp;+&nbsp;0.017</sub></sup></td>\n<td align=\"right\"><sup><sub>40.9</sub></sup></td>\n<td align=\"right\"><sup><sub>36.4</sub></sup></td>\n<td align=\"right\"><sup><sub>35861858</sub></sup></td>\n</tr>\n<!-- END E2E MASK RCNN BN TABLE -->\n</tbody></table>\n\n**Notes:**\n\n- This table is copied from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).\n- BN<sup>*</sup> means that BatchNorm (BN) is used for pre-training and is frozen and turned into a per-channel linear layer when fine-tuning. This is the default of Faster/Mask R-CNN and Detectron.\n\n### Mask R-CNN with GN\n\n#### Standard Mask R-CNN recipe\n<table><tbody>\n<!-- START E2E MASK RCNN GN TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN, GN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.5</sub></sup></td>\n<td align=\"right\"><sup><sub>1.017</sub></sup></td>\n<td align=\"right\"><sup><sub>50.8</sub></sup></td>\n<td align=\"right\"><sup><sub>0.146&nbsp;+&nbsp;0.017</sub></sup></td>\n<td align=\"right\"><sup><sub>40.3</sub></sup></td>\n<td align=\"right\"><sup><sub>35.7</sub></sup></td>\n<td align=\"right\"><sup><sub>48616381</sub></sup></td>\n<td align=\"left\"><sup><sub>\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN, GN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>2x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.4</sub></sup></td>\n<td align=\"right\"><sup><sub>1.151</sub></sup></td>\n<td align=\"right\"><sup><sub>57.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.180&nbsp;+&nbsp;0.015</sub></sup></td>\n<td align=\"right\"><sup><sub>41.8</sub></sup></td>\n<td align=\"right\"><sup><sub>36.8</sub></sup></td>\n<td align=\"right\"><sup><sub>48616724</sub></sup></td>\n<td align=\"left\"><sup><sub>\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<!-- END E2E MASK RCNN GN TABLE -->\n</tbody></table>\n\n**Notes:**\n- GN is applied on: (i) ResNet layers inherited from pre-training, (ii) the FPN-specific layers, (iii) the RoI bbox head, and (iv) the RoI mask head.\n- These GN models use a 4conv+1fc RoI box head. The BN<sup>*</sup> counterpart with this head performs similarly with the default 2fc head: using this codebase, R-50-FPN BN<sup>\\*</sup> with 4conv+1fc has 38.8/34.4 box/mask AP.\n- 2x is the default schedule (180k) in Detectron.\n\n#### Longer training schedule\n<table><tbody>\n<!-- START E2E MASK RCNN GN 3X TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>download<br/>links</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN, GN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub><b>3x</b></sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.5</sub></sup></td>\n<td align=\"right\"><sup><sub>1.033</sub></sup></td>\n<td align=\"right\"><sup><sub>77.4</sub></sup></td>\n<td align=\"right\"><sup><sub>0.145&nbsp;+&nbsp;0.015</sub></sup></td>\n<td align=\"right\"><sup><sub>40.8</sub></sup></td>\n<td align=\"right\"><sup><sub>36.1</sub></sup></td>\n<td align=\"right\"><sup><sub>48734751</sub></sup></td>\n<td align=\"left\"><sup><sub>\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN, GN</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub><b>3x</b></sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.4</sub></sup></td>\n<td align=\"right\"><sup><sub>1.171</sub></sup></td>\n<td align=\"right\"><sup><sub>87.9</sub></sup></td>\n<td align=\"right\"><sup><sub>0.180&nbsp;+&nbsp;0.014</sub></sup></td>\n<td align=\"right\"><sup><sub>42.3</sub></sup></td>\n<td align=\"right\"><sup><sub>37.2</sub></sup></td>\n<td align=\"right\"><sup><sub>48734779</sub></sup></td>\n<td align=\"left\"><sup><sub>\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\">model</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json\">boxes</a>\n  &nbsp;|&nbsp;\n  <a href=\"https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json\">masks</a></sub></sup></td>\n</tr>\n<!-- END E2E MASK RCNN GN 3X TABLE -->\n</tbody></table>\n\n**Notes:**\n- 3x is a longer schedule (270k). GN can improve further when using the longer schedule, but its BN<sup>*</sup> counterpart remains similar (R-50-FPN BN<sup>\\*</sup>: 38.9/34.3) with the longer schedule.\n- These models are **without** any scale augmentation that can further [improve results](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#mask-r-cnn-with-bells--whistles).\n\n\n### Explorations\n\n#### Training Mask R-CNN from scratch\n\nGN enables to train Mask R-CNN *from scratch* without ImageNet pre-training, despite the small batch size.\n\n<table><tbody>\n<!-- START E2E MASK RCNN GN SCRATCH TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<th valign=\"bottom\"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>type</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>lr<br/>schd</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>im/<br/>gpu</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>box<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>mask<br/>AP</sub></sup></th>\n<th valign=\"bottom\"><sup><sub>model id</sub></sup></th>\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub>R-50-FPN, GN, scratch</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>3x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>10.8</sub></sup></td>\n<td align=\"right\"><sup><sub>1.087</sub></sup></td>\n<td align=\"right\"><sup><sub>81.5</sub></sup></td>\n<td align=\"right\"><sup><sub>0.140&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>39.5</sub></sup></td>\n<td align=\"right\"><sup><sub>35.2</sub></sup></td>\n<td align=\"right\"><sup><sub>56421872</sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub>R-101-FPN, GN, scratch</sub></sup></td>\n<td align=\"left\"><sup><sub>Mask R-CNN</sub></sup></td>\n<td align=\"left\"><sup><sub>3x</sub></sup></td>\n<td align=\"right\"><sup><sub>2</sub></sup></td>\n<td align=\"right\"><sup><sub>12.7</sub></sup></td>\n<td align=\"right\"><sup><sub>1.243</sub></sup></td>\n<td align=\"right\"><sup><sub>93.2</sub></sup></td>\n<td align=\"right\"><sup><sub>0.177&nbsp;+&nbsp;0.019</sub></sup></td>\n<td align=\"right\"><sup><sub>41.0</sub></sup></td>\n<td align=\"right\"><sup><sub>36.4</sub></sup></td>\n<td align=\"right\"><sup><sub>56421911</sub></sup></td>\n</tr>\n<!-- END E2E MASK RCNN GN SCRATCH TABLE -->\n</tbody></table>\n\n**Notes:**\n- To reproduce these results, see the config yaml files starting with ```scratch ```.\n- These are results using ```freeze_at=0```. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.\n\n&nbsp;\n\n<table><tbody>\n<!-- START E2E MASK RCNN GN SCRATCH TABLE -->\n<!-- TABLE HEADER -->\n<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->\n<!-- TABLE BODY -->\n<tr>\n<td align=\"left\"><sup><sub><s>R-50-FPN, GN, scratch</s></sub></sup></td>\n<td align=\"left\"><sup><sub><s>Mask R-CNN</s></sub></sup></td>\n<td align=\"left\"><sup><sub><s>3x</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>2</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>10.5</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>0.990</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>74.3</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>0.146&nbsp;+&nbsp;0.020</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>36.2</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>32.5</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>49025460</s></sub></sup></td>\n</tr>\n<tr>\n<td align=\"left\"><sup><sub><s>R-101-FPN, GN, scratch</s></sub></sup></td>\n<td align=\"left\"><sup><sub><s>Mask R-CNN</s></sub></sup></td>\n<td align=\"left\"><sup><sub><s>3x</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>2</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>12.4</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>1.124</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>84.3</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>0.180&nbsp;+&nbsp;0.019</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>37.5</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>33.3</s></sub></sup></td>\n<td align=\"right\"><sup><sub><s>49024951</s></sub></sup></td>\n</tr>\n<!-- END E2E MASK RCNN GN SCRATCH TABLE -->\n</tbody></table>\n\n**Notes:**\n- These are early results that followed the default training using ```freeze_at=2```. This means the layers of conv1 and res2 were simply random weights in the case of training from-scratch. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.\n"
  },
  {
    "path": "requirements.txt",
    "content": "numpy>=1.13\npyyaml==3.12\nmatplotlib\nopencv-python>=3.2\nsetuptools\nCython\nmock\nscipy\nsix\nfuture\nprotobuf\n"
  },
  {
    "path": "setup.py",
    "content": "# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom Cython.Build import cythonize\nfrom setuptools import Extension\nfrom setuptools import setup\n\nimport numpy as np\n\n_NP_INCLUDE_DIRS = np.get_include()\n\n\n# Extension modules\next_modules = [\n    Extension(\n        name='detectron.utils.cython_bbox',\n        sources=[\n            'detectron/utils/cython_bbox.pyx'\n        ],\n        extra_compile_args=[\n            '-Wno-cpp'\n        ],\n        include_dirs=[\n            _NP_INCLUDE_DIRS\n        ]\n    ),\n    Extension(\n        name='detectron.utils.cython_nms',\n        sources=[\n            'detectron/utils/cython_nms.pyx'\n        ],\n        extra_compile_args=[\n            '-Wno-cpp'\n        ],\n        include_dirs=[\n            _NP_INCLUDE_DIRS\n        ]\n    )\n]\n\nsetup(\n    name='Detectron',\n    packages=['detectron'],\n    ext_modules=cythonize(ext_modules)\n)\n"
  },
  {
    "path": "tools/convert_cityscapes_to_coco.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport h5py\nimport json\nimport os\nimport imageio\nimport sys\n\nimport cityscapesscripts.evaluation.instances2dict_with_polygons as cs\n\nimport detectron.utils.segms as segms_util\nimport detectron.utils.boxes as bboxs_util\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Convert dataset')\n    parser.add_argument(\n        '--dataset', help=\"cocostuff, cityscapes\", default=None, type=str)\n    parser.add_argument(\n        '--outdir', help=\"output dir for json files\", default=None, type=str)\n    parser.add_argument(\n        '--datadir', help=\"data dir for annotations to be converted\",\n        default=None, type=str)\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    return parser.parse_args()\n\n\ndef convert_coco_stuff_mat(data_dir, out_dir):\n    \"\"\"Convert to png and save json with path. This currently only contains\n    the segmentation labels for objects+stuff in cocostuff - if we need to\n    combine with other labels from original COCO that will be a TODO.\"\"\"\n    sets = ['train', 'val']\n    categories = []\n    json_name = 'coco_stuff_%s.json'\n    ann_dict = {}\n    for data_set in sets:\n        file_list = os.path.join(data_dir, '%s.txt')\n        images = []\n        with open(file_list % data_set) as f:\n            for img_id, img_name in enumerate(f):\n                img_name = img_name.replace('coco', 'COCO').strip('\\n')\n                image = {}\n                mat_file = os.path.join(\n                    data_dir, 'annotations/%s.mat' % img_name)\n                data = h5py.File(mat_file, 'r')\n                labelMap = data.get('S')\n                if len(categories) == 0:\n                    labelNames = data.get('names')\n                    for idx, n in enumerate(labelNames):\n                        categories.append(\n                            {\"id\": idx, \"name\": ''.join(chr(i) for i in data[\n                                n[0]])})\n                    ann_dict['categories'] = categories\n                imageio.imsave(\n                    os.path.join(data_dir, img_name + '.png'), labelMap)\n                image['width'] = labelMap.shape[0]\n                image['height'] = labelMap.shape[1]\n                image['file_name'] = img_name\n                image['seg_file_name'] = img_name\n                image['id'] = img_id\n                images.append(image)\n        ann_dict['images'] = images\n        print(\"Num images: %s\" % len(images))\n        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:\n            outfile.write(json.dumps(ann_dict))\n\n\n# for Cityscapes\ndef getLabelID(self, instID):\n    if (instID < 1000):\n        return instID\n    else:\n        return int(instID / 1000)\n\n\ndef convert_cityscapes_instance_only(\n        data_dir, out_dir):\n    \"\"\"Convert from cityscapes format to COCO instance seg format - polygons\"\"\"\n    sets = [\n        'gtFine_val',\n        # 'gtFine_train',\n        # 'gtFine_test',\n\n        # 'gtCoarse_train',\n        # 'gtCoarse_val',\n        # 'gtCoarse_train_extra'\n    ]\n    ann_dirs = [\n        'gtFine_trainvaltest/gtFine/val',\n        # 'gtFine_trainvaltest/gtFine/train',\n        # 'gtFine_trainvaltest/gtFine/test',\n\n        # 'gtCoarse/train',\n        # 'gtCoarse/train_extra',\n        # 'gtCoarse/val'\n    ]\n    json_name = 'instancesonly_filtered_%s.json'\n    ends_in = '%s_polygons.json'\n    img_id = 0\n    ann_id = 0\n    cat_id = 1\n    category_dict = {}\n\n    category_instancesonly = [\n        'person',\n        'rider',\n        'car',\n        'truck',\n        'bus',\n        'train',\n        'motorcycle',\n        'bicycle',\n    ]\n\n    for data_set, ann_dir in zip(sets, ann_dirs):\n        print('Starting %s' % data_set)\n        ann_dict = {}\n        images = []\n        annotations = []\n        ann_dir = os.path.join(data_dir, ann_dir)\n        for root, _, files in os.walk(ann_dir):\n            for filename in files:\n                if filename.endswith(ends_in % data_set.split('_')[0]):\n                    if len(images) % 50 == 0:\n                        print(\"Processed %s images, %s annotations\" % (\n                            len(images), len(annotations)))\n                    json_ann = json.load(open(os.path.join(root, filename)))\n                    image = {}\n                    image['id'] = img_id\n                    img_id += 1\n\n                    image['width'] = json_ann['imgWidth']\n                    image['height'] = json_ann['imgHeight']\n                    image['file_name'] = filename[:-len(\n                        ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'\n                    image['seg_file_name'] = filename[:-len(\n                        ends_in % data_set.split('_')[0])] + \\\n                        '%s_instanceIds.png' % data_set.split('_')[0]\n                    images.append(image)\n\n                    fullname = os.path.join(root, image['seg_file_name'])\n                    objects = cs.instances2dict_with_polygons(\n                        [fullname], verbose=False)[fullname]\n\n                    for object_cls in objects:\n                        if object_cls not in category_instancesonly:\n                            continue  # skip non-instance categories\n\n                        for obj in objects[object_cls]:\n                            if obj['contours'] == []:\n                                print('Warning: empty contours.')\n                                continue  # skip non-instance categories\n\n                            len_p = [len(p) for p in obj['contours']]\n                            if min(len_p) <= 4:\n                                print('Warning: invalid contours.')\n                                continue  # skip non-instance categories\n\n                            ann = {}\n                            ann['id'] = ann_id\n                            ann_id += 1\n                            ann['image_id'] = image['id']\n                            ann['segmentation'] = obj['contours']\n\n                            if object_cls not in category_dict:\n                                category_dict[object_cls] = cat_id\n                                cat_id += 1\n                            ann['category_id'] = category_dict[object_cls]\n                            ann['iscrowd'] = 0\n                            ann['area'] = obj['pixelCount']\n                            ann['bbox'] = bboxs_util.xyxy_to_xywh(\n                                segms_util.polys_to_boxes(\n                                    [ann['segmentation']])).tolist()[0]\n\n                            annotations.append(ann)\n\n        ann_dict['images'] = images\n        categories = [{\"id\": category_dict[name], \"name\": name} for name in\n                      category_dict]\n        ann_dict['categories'] = categories\n        ann_dict['annotations'] = annotations\n        print(\"Num categories: %s\" % len(categories))\n        print(\"Num images: %s\" % len(images))\n        print(\"Num annotations: %s\" % len(annotations))\n        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:\n            outfile.write(json.dumps(ann_dict))\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    if args.dataset == \"cityscapes_instance_only\":\n        convert_cityscapes_instance_only(args.datadir, args.outdir)\n    elif args.dataset == \"cocostuff\":\n        convert_coco_stuff_mat(args.datadir, args.outdir)\n    else:\n        print(\"Dataset not supported: %s\" % args.dataset)\n"
  },
  {
    "path": "tools/convert_coco_model_to_cityscapes.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n# Convert a detection model trained for COCO into a model that can be fine-tuned\n# on cityscapes\n#\n# cityscapes_to_coco\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport numpy as np\nimport os\nimport sys\n\nimport detectron.datasets.coco_to_cityscapes_id as cs\nfrom detectron.utils.io import load_object\nfrom detectron.utils.io import save_object\n\nNUM_CS_CLS = 9\nNUM_COCO_CLS = 81\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert a COCO pre-trained model for use with Cityscapes')\n    parser.add_argument(\n        '--coco_model', dest='coco_model_file_name',\n        help='Pretrained network weights file path',\n        default=None, type=str)\n    parser.add_argument(\n        '--convert_func', dest='convert_func',\n        help='Blob conversion function',\n        default='cityscapes_to_coco', type=str)\n    parser.add_argument(\n        '--output', dest='out_file_name',\n        help='Output file path',\n        default=None, type=str)\n\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n\n    args = parser.parse_args()\n    return args\n\n\ndef convert_coco_blobs_to_cityscape_blobs(model_dict):\n    for k, v in model_dict['blobs'].items():\n        if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS:\n            coco_blob = model_dict['blobs'][k]\n            print(\n                'Converting COCO blob {} with shape {}'.\n                format(k, coco_blob.shape)\n            )\n            cs_blob = convert_coco_blob_to_cityscapes_blob(\n                coco_blob, args.convert_func\n            )\n            print(' -> converted shape {}'.format(cs_blob.shape))\n            model_dict['blobs'][k] = cs_blob\n\n\ndef convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func):\n    # coco blob (81, ...) or (81*4, ...)\n    coco_shape = coco_blob.shape\n    leading_factor = int(coco_shape[0] / NUM_COCO_CLS)\n    tail_shape = list(coco_shape[1:])\n    assert leading_factor == 1 or leading_factor == 4\n\n    # Reshape in [num_classes, ...] form for easier manipulations\n    coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape)\n    # Default initialization uses Gaussian with mean and std to match the\n    # existing parameters\n    std = coco_blob.std()\n    mean = coco_blob.mean()\n    cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:])\n    cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32)\n\n    # Replace random parameters with COCO parameters if class mapping exists\n    for i in range(NUM_CS_CLS):\n        coco_cls_id = getattr(cs, convert_func)(i)\n        if coco_cls_id >= 0:  # otherwise ignore (rand init)\n            cs_blob[i] = coco_blob[coco_cls_id]\n\n    cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape\n    return cs_blob.reshape(cs_shape)\n\n\ndef remove_momentum(model_dict):\n    for k in model_dict['blobs'].keys():\n        if k.endswith('_momentum'):\n            del model_dict['blobs'][k]\n\n\ndef load_and_convert_coco_model(args):\n    model_dict = load_object(args.coco_model_file_name)\n    remove_momentum(model_dict)\n    convert_coco_blobs_to_cityscape_blobs(model_dict)\n    return model_dict\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    print(args)\n    assert os.path.exists(args.coco_model_file_name), \\\n        'Weights file does not exist'\n    weights = load_and_convert_coco_model(args)\n\n    save_object(weights, args.out_file_name)\n    print('Wrote blobs to {}:'.format(args.out_file_name))\n    print(sorted(weights['blobs'].keys()))\n"
  },
  {
    "path": "tools/convert_pkl_to_pb.py",
    "content": "#!/usr/bin/env python3\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Script to convert the model (.yaml and .pkl) trained by train_net to a\nstandard Caffe2 model in pb format (model.pb and model_init.pb). The converted\nmodel is good for production usage, as it could run independently and efficiently\non CPU, GPU and mobile without depending on the detectron codebase.\n\nPlease see Caffe2 tutorial (\nhttps://caffe2.ai/docs/tutorial-loading-pre-trained-models.html) for loading\nthe converted model, and run_model_pb() for running the model for inference.\n\"\"\"\n\nfrom __future__ import absolute_import, division, print_function, unicode_literals\n\nimport argparse\nimport copy\nimport os\nimport pprint\nimport sys\n\nimport caffe2.python.utils as putils\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport detectron.core.test_engine as test_engine\nimport detectron.utils.blob as blob_utils\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.model_convert_utils as mutils\nimport detectron.utils.vis as vis_utils\nimport numpy as np\nfrom caffe2.caffe2.fb.predictor import predictor_exporter, predictor_py_utils\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import core, workspace\nfrom caffe2.python.predictor_constants import predictor_constants\nfrom detectron.core.config import (\n    assert_and_infer_cfg,\n    cfg,\n    merge_cfg_from_file,\n    merge_cfg_from_list,\n)\nfrom detectron.modeling import generate_anchors\nfrom detectron.utils.logging import setup_logging\nfrom detectron.utils.model_convert_utils import convert_op_in_proto, op_filter\n\n\nc2_utils.import_contrib_ops()\nc2_utils.import_detectron_ops()\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\nlogger = setup_logging(__name__)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description=\"Convert a trained network to pb format\"\n    )\n    parser.add_argument(\n        \"--cfg\", dest=\"cfg_file\", help=\"optional config file\", default=None, type=str\n    )\n    parser.add_argument(\n        \"--net_name\",\n        dest=\"net_name\",\n        help=\"optional name for the net\",\n        default=\"detectron\",\n        type=str,\n    )\n    parser.add_argument(\n        \"--out_dir\", dest=\"out_dir\", help=\"output dir\", default=None, type=str\n    )\n    parser.add_argument(\n        \"--test_img\",\n        dest=\"test_img\",\n        help=\"optional test image, used to verify the model conversion\",\n        default=None,\n        type=str,\n    )\n    parser.add_argument(\n        \"--fuse_af\", dest=\"fuse_af\", help=\"1 to fuse_af\", default=1, type=int\n    )\n    parser.add_argument(\n        \"--device\",\n        dest=\"device\",\n        help=\"Device to run the model on\",\n        choices=[\"cpu\", \"gpu\"],\n        default=\"cpu\",\n        type=str,\n    )\n    parser.add_argument(\n        \"--net_execution_type\",\n        dest=\"net_execution_type\",\n        help=\"caffe2 net execution type\",\n        choices=[\"simple\", \"dag\"],\n        default=\"simple\",\n        type=str,\n    )\n    parser.add_argument(\n        \"--use_nnpack\",\n        dest=\"use_nnpack\",\n        help=\"Use nnpack for conv\",\n        default=1,\n        type=int,\n    )\n    parser.add_argument(\n        \"--logdb\",\n        dest=\"logdb\",\n        help=\"output to logfiledb instead of pb files\",\n        default=0,\n        type=int,\n    )\n    parser.add_argument(\n        \"opts\",\n        help=\"See detectron/core/config.py for all options\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    ret = parser.parse_args()\n    ret.out_dir = os.path.abspath(ret.out_dir)\n    if ret.device == \"gpu\" and ret.use_nnpack:\n        logger.warn(\"Should not use mobile engine for gpu model.\")\n        ret.use_nnpack = 0\n\n    return ret\n\n\ndef unscope_name(name):\n    return c2_utils.UnscopeName(name)\n\n\ndef reset_names(names):\n    for i in range(len(names)):\n        names[i] = unscope_name(names[i])\n\n\ndef convert_collect_and_distribute(\n    op,\n    blobs,\n    roi_canonical_scale,\n    roi_canonical_level,\n    roi_max_level,\n    roi_min_level,\n    rpn_max_level,\n    rpn_min_level,\n    rpn_post_nms_topN,\n):\n    print(\n        \"Converting CollectAndDistributeFpnRpnProposals\"\n        \" Python -> C++:\\n{}\".format(op)\n    )\n    assert op.name.startswith(\n        \"CollectAndDistributeFpnRpnProposalsOp\"\n    ), \"Not valid CollectAndDistributeFpnRpnProposalsOp\"\n\n    inputs = [x for x in op.input]\n    ret = core.CreateOperator(\n        \"CollectAndDistributeFpnRpnProposals\",\n        inputs,\n        list(op.output),\n        roi_canonical_scale=roi_canonical_scale,\n        roi_canonical_level=roi_canonical_level,\n        roi_max_level=roi_max_level,\n        roi_min_level=roi_min_level,\n        rpn_max_level=rpn_max_level,\n        rpn_min_level=rpn_min_level,\n        rpn_post_nms_topN=rpn_post_nms_topN,\n    )\n    return ret\n\n\ndef convert_gen_proposals(\n    op, blobs, rpn_pre_nms_topN, rpn_post_nms_topN, rpn_nms_thresh, rpn_min_size\n):\n    print(\"Converting GenerateProposals Python -> C++:\\n{}\".format(op))\n    assert op.name.startswith(\"GenerateProposalsOp\"), \"Not valid GenerateProposalsOp\"\n\n    spatial_scale = mutils.get_op_arg_valf(op, \"spatial_scale\", None)\n    assert spatial_scale is not None\n\n    lvl = int(op.input[0][-1]) if op.input[0][-1].isdigit() else None\n\n    inputs = [x for x in op.input]\n    anchor_name = \"anchor{}\".format(lvl) if lvl else \"anchor\"\n    inputs.append(anchor_name)\n    anchor_sizes = (\n        (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.0 ** (lvl - cfg.FPN.RPN_MIN_LEVEL),)\n        if lvl\n        else cfg.RPN.SIZES\n    )\n    blobs[anchor_name] = get_anchors(spatial_scale, anchor_sizes)\n    print(\"anchors {}\".format(blobs[anchor_name]))\n\n    ret = core.CreateOperator(\n        \"GenerateProposals\",\n        inputs,\n        list(op.output),\n        spatial_scale=spatial_scale,\n        pre_nms_topN=rpn_pre_nms_topN,\n        post_nms_topN=rpn_post_nms_topN,\n        nms_thresh=rpn_nms_thresh,\n        min_size=rpn_min_size,\n        correct_transform_coords=True,\n    )\n    return ret, anchor_name\n\n\ndef get_anchors(spatial_scale, anchor_sizes):\n    anchors = generate_anchors.generate_anchors(\n        stride=1.0 / spatial_scale,\n        sizes=anchor_sizes,\n        aspect_ratios=cfg.RPN.ASPECT_RATIOS,\n    ).astype(np.float32)\n    return anchors\n\n\ndef reset_blob_names(blobs):\n    ret = {unscope_name(x): blobs[x] for x in blobs}\n    blobs.clear()\n    blobs.update(ret)\n\n\ndef convert_net(args, net, blobs):\n    @op_filter()\n    def convert_op_name(op):\n        if args.device != \"gpu\":\n            if op.engine != \"DEPTHWISE_3x3\":\n                op.engine = \"\"\n            op.device_option.CopyFrom(caffe2_pb2.DeviceOption())\n        reset_names(op.input)\n        reset_names(op.output)\n        return [op]\n\n    @op_filter(type=\"Python\")\n    def convert_python(op):\n        if op.name.startswith(\"GenerateProposalsOp\"):\n            gen_proposals_op, ext_input = convert_gen_proposals(\n                op,\n                blobs,\n                rpn_min_size=float(cfg.TEST.RPN_MIN_SIZE),\n                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,\n                rpn_pre_nms_topN=cfg.TEST.RPN_PRE_NMS_TOP_N,\n                rpn_nms_thresh=cfg.TEST.RPN_NMS_THRESH,\n            )\n            net.external_input.extend([ext_input])\n            return [gen_proposals_op]\n        elif op.name.startswith(\"CollectAndDistributeFpnRpnProposalsOp\"):\n            collect_dist_op = convert_collect_and_distribute(\n                op,\n                blobs,\n                roi_canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,\n                roi_canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,\n                roi_max_level=cfg.FPN.ROI_MAX_LEVEL,\n                roi_min_level=cfg.FPN.ROI_MIN_LEVEL,\n                rpn_max_level=cfg.FPN.RPN_MAX_LEVEL,\n                rpn_min_level=cfg.FPN.RPN_MIN_LEVEL,\n                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,\n            )\n            return [collect_dist_op]\n        else:\n            raise ValueError(\"Failed to convert Python op {}\".format(op.name))\n\n    # Only convert UpsampleNearest to ResizeNearest when converting to pb so that the existing models is unchanged\n    # https://github.com/facebookresearch/Detectron/pull/372#issuecomment-410248561\n    @op_filter(type=\"UpsampleNearest\")\n    def convert_upsample_nearest(op):\n        for arg in op.arg:\n            if arg.name == \"scale\":\n                scale = arg.i\n                break\n        else:\n            raise KeyError('No attribute \"scale\" in UpsampleNearest op')\n        resize_nearest_op = core.CreateOperator(\n            \"ResizeNearest\",\n            list(op.input),\n            list(op.output),\n            name=op.name,\n            width_scale=float(scale),\n            height_scale=float(scale),\n        )\n        return resize_nearest_op\n\n    @op_filter()\n    def convert_rpn_rois(op):\n        for j in range(len(op.input)):\n            if op.input[j] == \"rois\":\n                print(\n                    \"Converting op {} input name: rois -> rpn_rois:\\n{}\".format(\n                        op.type, op\n                    )\n                )\n                op.input[j] = \"rpn_rois\"\n        for j in range(len(op.output)):\n            if op.output[j] == \"rois\":\n                print(\n                    \"Converting op {} output name: rois -> rpn_rois:\\n{}\".format(\n                        op.type, op\n                    )\n                )\n                op.output[j] = \"rpn_rois\"\n        return [op]\n\n    @op_filter(type_in=[\"StopGradient\", \"Alias\"])\n    def convert_remove_op(op):\n        print(\"Removing op {}:\\n{}\".format(op.type, op))\n        return []\n\n    # We want to apply to all operators, including converted\n    # so run separately\n    convert_op_in_proto(net, convert_remove_op)\n    convert_op_in_proto(net, convert_upsample_nearest)\n    convert_op_in_proto(net, convert_python)\n    convert_op_in_proto(net, convert_op_name)\n    convert_op_in_proto(net, convert_rpn_rois)\n\n    reset_names(net.external_input)\n    reset_names(net.external_output)\n\n    reset_blob_names(blobs)\n\n\ndef add_bbox_ops(args, net, blobs):\n    new_ops = []\n    new_external_outputs = []\n\n    # Operators for bboxes\n    op_box = core.CreateOperator(\n        \"BBoxTransform\",\n        [\"rpn_rois\", \"bbox_pred\", \"im_info\"],\n        [\"pred_bbox\"],\n        weights=cfg.MODEL.BBOX_REG_WEIGHTS,\n        apply_scale=False,\n        correct_transform_coords=True,\n    )\n    new_ops.extend([op_box])\n\n    blob_prob = \"cls_prob\"\n    blob_box = \"pred_bbox\"\n    op_nms = core.CreateOperator(\n        \"BoxWithNMSLimit\",\n        [blob_prob, blob_box],\n        [\"score_nms\", \"bbox_nms\", \"class_nms\"],\n        arg=[\n            putils.MakeArgument(\"score_thresh\", cfg.TEST.SCORE_THRESH),\n            putils.MakeArgument(\"nms\", cfg.TEST.NMS),\n            putils.MakeArgument(\"detections_per_im\", cfg.TEST.DETECTIONS_PER_IM),\n            putils.MakeArgument(\"soft_nms_enabled\", cfg.TEST.SOFT_NMS.ENABLED),\n            putils.MakeArgument(\"soft_nms_method\", cfg.TEST.SOFT_NMS.METHOD),\n            putils.MakeArgument(\"soft_nms_sigma\", cfg.TEST.SOFT_NMS.SIGMA),\n        ],\n    )\n    new_ops.extend([op_nms])\n    new_external_outputs.extend([\"score_nms\", \"bbox_nms\", \"class_nms\"])\n\n    net.Proto().op.extend(new_ops)\n    net.Proto().external_output.extend(new_external_outputs)\n\n\ndef convert_model_gpu(args, net, init_net):\n    assert args.device == \"gpu\"\n\n    ret_net = copy.deepcopy(net)\n    ret_init_net = copy.deepcopy(init_net)\n\n    cdo_cuda = mutils.get_device_option_cuda()\n    cdo_cpu = mutils.get_device_option_cpu()\n\n    CPU_OPS = [\n        [\"CollectAndDistributeFpnRpnProposals\", None],\n        [\"GenerateProposals\", None],\n        [\"BBoxTransform\", None],\n        [\"BoxWithNMSLimit\", None],\n    ]\n    CPU_BLOBS = [\"im_info\", \"anchor\"]\n\n    @op_filter()\n    def convert_op_gpu(op):\n        for x in CPU_OPS:\n            if mutils.filter_op(op, type=x[0], inputs=x[1]):\n                return None\n        op.device_option.CopyFrom(cdo_cuda)\n        return [op]\n\n    @op_filter()\n    def convert_init_op_gpu(op):\n        if op.output[0] in CPU_BLOBS:\n            op.device_option.CopyFrom(cdo_cpu)\n        else:\n            op.device_option.CopyFrom(cdo_cuda)\n        return [op]\n\n    convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu)\n    convert_op_in_proto(ret_net.Proto(), convert_op_gpu)\n\n    ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net])\n\n    return [ret[0][1], ret[0][0]]\n\n\ndef gen_init_net(net, blobs, empty_blobs):\n    blobs = copy.deepcopy(blobs)\n    for x in empty_blobs:\n        blobs[x] = np.array([], dtype=np.float32)\n    init_net = mutils.gen_init_net_from_blobs(blobs, net.external_inputs)\n    init_net = core.Net(init_net)\n    return init_net\n\n\ndef _save_image_graphs(args, all_net, all_init_net):\n    print(\"Saving model graph...\")\n    mutils.save_graph(\n        all_net.Proto(), os.path.join(args.out_dir, \"model_def.png\"), op_only=False\n    )\n    print(\"Model def image saved to {}.\".format(args.out_dir))\n\n\ndef _save_models(all_net, all_init_net, args):\n    print(\"Writing converted model to {}...\".format(args.out_dir))\n    fname = \"model\"\n\n    if not os.path.exists(args.out_dir):\n        os.makedirs(args.out_dir)\n\n    with open(os.path.join(args.out_dir, fname + \".pb\"), \"wb\") as f:\n        f.write(all_net.Proto().SerializeToString())\n    with open(os.path.join(args.out_dir, fname + \".pbtxt\"), \"wb\") as f:\n        f.write(str(all_net.Proto()))\n    with open(os.path.join(args.out_dir, fname + \"_init.pb\"), \"wb\") as f:\n        f.write(all_init_net.Proto().SerializeToString())\n\n    _save_image_graphs(args, all_net, all_init_net)\n\n\ndef load_model(args):\n    model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS)\n    blobs = mutils.get_ws_blobs()\n\n    return model, blobs\n\n\ndef _get_result_blobs(check_blobs):\n    ret = {}\n    for x in check_blobs:\n        sn = core.ScopedName(x)\n        if workspace.HasBlob(sn):\n            ret[x] = workspace.FetchBlob(sn)\n        else:\n            ret[x] = None\n\n    return ret\n\n\ndef _sort_results(boxes, segms, keypoints, classes):\n    indices = np.argsort(boxes[:, -1])[::-1]\n    if boxes is not None:\n        boxes = boxes[indices, :]\n    if segms is not None:\n        segms = [segms[x] for x in indices]\n    if keypoints is not None:\n        keypoints = [keypoints[x] for x in indices]\n    if classes is not None:\n        if isinstance(classes, list):\n            classes = [classes[x] for x in indices]\n        else:\n            classes = classes[indices]\n\n    return boxes, segms, keypoints, classes\n\n\ndef run_model_cfg(args, im, check_blobs):\n    workspace.ResetWorkspace()\n    model, _ = load_model(args)\n    with c2_utils.NamedCudaScope(0):\n        cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all(\n            model, im, None, None\n        )\n\n    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(\n        cls_boxes, cls_segms, cls_keyps\n    )\n\n    # sort the results based on score for comparision\n    boxes, segms, keypoints, classes = _sort_results(boxes, segms, keypoints, classes)\n\n    # write final results back to workspace\n    def _ornone(res):\n        return np.array(res) if res is not None else np.array([], dtype=np.float32)\n\n    with c2_utils.NamedCudaScope(0):\n        workspace.FeedBlob(core.ScopedName(\"result_boxes\"), _ornone(boxes))\n        workspace.FeedBlob(core.ScopedName(\"result_segms\"), _ornone(segms))\n        workspace.FeedBlob(core.ScopedName(\"result_keypoints\"), _ornone(keypoints))\n        workspace.FeedBlob(core.ScopedName(\"result_classids\"), _ornone(classes))\n\n    # get result blobs\n    with c2_utils.NamedCudaScope(0):\n        ret = _get_result_blobs(check_blobs)\n\n    return ret\n\n\ndef _prepare_blobs(im, pixel_means, target_size, max_size):\n    \"\"\" Reference: blob.prep_im_for_blob() \"\"\"\n\n    im = im.astype(np.float32, copy=False)\n    im -= pixel_means\n    im_shape = im.shape\n\n    im_size_min = np.min(im_shape[0:2])\n    im_size_max = np.max(im_shape[0:2])\n    im_scale = float(target_size) / float(im_size_min)\n    if np.round(im_scale * im_size_max) > max_size:\n        im_scale = float(max_size) / float(im_size_max)\n    im = cv2.resize(\n        im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR\n    )\n\n    # Reuse code in blob_utils and fit FPN\n    blob = blob_utils.im_list_to_blob([im])\n\n    blobs = {}\n    blobs[\"data\"] = blob\n    blobs[\"im_info\"] = np.array(\n        [[blob.shape[2], blob.shape[3], im_scale]], dtype=np.float32\n    )\n    return blobs\n\n\ndef run_model_pb(args, net, init_net, im, check_blobs):\n    workspace.ResetWorkspace()\n    workspace.RunNetOnce(init_net)\n    mutils.create_input_blobs_for_net(net.Proto())\n    workspace.CreateNet(net)\n\n    # input_blobs, _ = core_test._get_blobs(im, None)\n    input_blobs = _prepare_blobs(im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)\n    gpu_blobs = []\n    if args.device == \"gpu\":\n        gpu_blobs = [\"data\"]\n    for k, v in input_blobs.items():\n        workspace.FeedBlob(\n            core.ScopedName(k),\n            v,\n            mutils.get_device_option_cuda()\n            if k in gpu_blobs\n            else mutils.get_device_option_cpu(),\n        )\n\n    try:\n        workspace.RunNet(net)\n        scores = workspace.FetchBlob(\"score_nms\")\n        classids = workspace.FetchBlob(\"class_nms\")\n        boxes = workspace.FetchBlob(\"bbox_nms\")\n    except Exception as e:\n        print(\"Running pb model failed.\\n{}\".format(e))\n        # may not detect anything at all\n        R = 0\n        scores = np.zeros((R,), dtype=np.float32)\n        boxes = np.zeros((R, 4), dtype=np.float32)\n        classids = np.zeros((R,), dtype=np.float32)\n\n    boxes = np.column_stack((boxes, scores))\n\n    # sort the results based on score for comparision\n    boxes, _, _, classids = _sort_results(boxes, None, None, classids)\n\n    # write final result back to workspace\n    workspace.FeedBlob(\"result_boxes\", boxes)\n    workspace.FeedBlob(\"result_classids\", classids)\n\n    ret = _get_result_blobs(check_blobs)\n\n    return ret\n\n\ndef verify_model(args, model_pb, test_img_file):\n    check_blobs = [\"result_boxes\", \"result_classids\"]  # result\n\n    print(\"Loading test file {}...\".format(test_img_file))\n    test_img = cv2.imread(test_img_file)\n    assert test_img is not None\n\n    def _run_cfg_func(im, blobs):\n        return run_model_cfg(args, im, check_blobs)\n\n    def _run_pb_func(im, blobs):\n        return run_model_pb(args, model_pb[0], model_pb[1], im, check_blobs)\n\n    print(\"Checking models...\")\n    assert mutils.compare_model(_run_cfg_func, _run_pb_func, test_img, check_blobs)\n\n\ndef _export_to_logfiledb(args, net, init_net, inputs, out_file, extra_out_tensors=None):\n    out_tensors = list(net.Proto().external_output)\n    if extra_out_tensors is not None:\n        out_tensors += extra_out_tensors\n    params = list(set(net.Proto().external_input) - set(inputs))\n    net_type = None\n    predictor_export_meta = predictor_exporter.PredictorExportMeta(\n        predict_net=net,\n        parameters=params,\n        inputs=inputs,\n        outputs=out_tensors,\n        net_type=net_type,\n    )\n\n    logger.info(\"Exporting Caffe2 model to {}\".format(out_file))\n    predictor_exporter.save_to_db(\n        db_type=\"log_file_db\",\n        db_destination=out_file,\n        predictor_export_meta=predictor_export_meta,\n    )\n\n\ndef main():\n    workspace.GlobalInit([\"caffe2\", \"--caffe2_log_level=0\"])\n    args = parse_args()\n    logger.info(\"Called with args:\")\n    logger.info(args)\n    if args.cfg_file is not None:\n        merge_cfg_from_file(args.cfg_file)\n    if args.opts is not None:\n        merge_cfg_from_list(args.opts)\n    cfg.NUM_GPUS = 1\n    assert_and_infer_cfg()\n    logger.info(\"Converting model with config:\")\n    logger.info(pprint.pformat(cfg))\n\n    # script will stop when it can't find an operator rather\n    # than stopping based on these flags\n    #\n    # assert not cfg.MODEL.KEYPOINTS_ON, \"Keypoint model not supported.\"\n    # assert not cfg.MODEL.MASK_ON, \"Mask model not supported.\"\n    # assert not cfg.FPN.FPN_ON, \"FPN not supported.\"\n    # assert not cfg.RETINANET.RETINANET_ON, \"RetinaNet model not supported.\"\n\n    # load model from cfg\n    model, blobs = load_model(args)\n\n    net = core.Net(\"\")\n    net.Proto().op.extend(copy.deepcopy(model.net.Proto().op))\n    net.Proto().external_input.extend(copy.deepcopy(model.net.Proto().external_input))\n    net.Proto().external_output.extend(copy.deepcopy(model.net.Proto().external_output))\n    net.Proto().type = args.net_execution_type\n    net.Proto().num_workers = 1 if args.net_execution_type == \"simple\" else 4\n\n    # Reset the device_option, change to unscope name and replace python operators\n    convert_net(args, net.Proto(), blobs)\n\n    # add operators for bbox\n    add_bbox_ops(args, net, blobs)\n\n    if args.fuse_af:\n        print(\"Fusing affine channel...\")\n        net, blobs = mutils.fuse_net_affine(net, blobs)\n\n    if args.use_nnpack:\n        mutils.update_mobile_engines(net.Proto())\n\n    # generate init net\n    empty_blobs = [\"data\", \"im_info\"]\n    init_net = gen_init_net(net, blobs, empty_blobs)\n\n    if args.device == \"gpu\":\n        [net, init_net] = convert_model_gpu(args, net, init_net)\n\n    net.Proto().name = args.net_name\n    init_net.Proto().name = args.net_name + \"_init\"\n\n    if args.test_img is not None:\n        verify_model(args, [net, init_net], args.test_img)\n\n    if args.logdb == 1:\n        output_file = os.path.join(args.out_dir, \"model.logfiledb\")\n        _export_to_logfiledb(args, net, init_net, empty_blobs, output_file)\n    else:\n        _save_models(net, init_net, args)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/convert_selective_search.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Script to convert Selective Search proposal boxes into the Detectron proposal\nfile format.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\nimport scipy.io as sio\nimport sys\n\nfrom detectron.datasets.json_dataset import JsonDataset\nfrom detectron.utils.io import save_object\n\n\nif __name__ == '__main__':\n    dataset_name = sys.argv[1]\n    file_in = sys.argv[2]\n    file_out = sys.argv[3]\n\n    ds = JsonDataset(dataset_name)\n    roidb = ds.get_roidb()\n    raw_data = sio.loadmat(file_in)['boxes'].ravel()\n    assert raw_data.shape[0] == len(roidb)\n\n    boxes = []\n    scores = []\n    ids = []\n    for i in range(raw_data.shape[0]):\n        if i % 1000 == 0:\n            print('{}/{}'.format(i + 1, len(roidb)))\n        # selective search boxes are 1-indexed and (y1, x1, y2, x2)\n        i_boxes = raw_data[i][:, (1, 0, 3, 2)] - 1\n        boxes.append(i_boxes.astype(np.float32))\n        scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32))\n        ids.append(roidb[i]['id'])\n\n    save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out)\n"
  },
  {
    "path": "tools/generate_testdev_from_test.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Given a full set of results (boxes, masks, or keypoints) on the 2017 COCO\ntest set, this script extracts the results subset that corresponds to 2017\ntest-dev. The test-dev subset can then be submitted to the COCO evaluation\nserver.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport json\nimport os\nimport sys\n\nfrom detectron.datasets.dataset_catalog import get_ann_fn\nfrom detectron.utils.timer import Timer\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        '--json', dest='json_file',\n        help='detections json file',\n        default='', type=str)\n    parser.add_argument(\n        '--output-dir', dest='output_dir',\n        help='output directory',\n        default='/tmp', type=str)\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    args = parser.parse_args()\n    return args\n\n\ndef convert(json_file, output_dir):\n    print('Reading: {}'.format(json_file))\n    with open(json_file, 'r') as fid:\n        dt = json.load(fid)\n    print('done!')\n\n    test_image_info = get_ann_fn('coco_2017_test')\n    with open(test_image_info, 'r') as fid:\n        info_test = json.load(fid)\n    image_test = info_test['images']\n    image_test_id = [i['id'] for i in image_test]\n    print('{} has {} images'.format(test_image_info, len(image_test_id)))\n\n    test_dev_image_info = get_ann_fn('coco_2017_test-dev')\n    with open(test_dev_image_info, 'r') as fid:\n        info_testdev = json.load(fid)\n    image_testdev = info_testdev['images']\n    image_testdev_id = [i['id'] for i in image_testdev]\n    print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id)))\n\n    dt_testdev = []\n    print('Filtering test-dev from test...')\n    t = Timer()\n    t.tic()\n    for i in range(len(dt)):\n        if i % 1000 == 0:\n            print('{}/{}'.format(i, len(dt)))\n        if dt[i]['image_id'] in image_testdev_id:\n            dt_testdev.append(dt[i])\n    print('Done filtering ({:2}s)!'.format(t.toc()))\n\n    filename, file_extension = os.path.splitext(os.path.basename(json_file))\n    filename = filename + '_test-dev'\n    filename = os.path.join(output_dir, filename + file_extension)\n    with open(filename, 'w') as fid:\n        info_test = json.dump(dt_testdev, fid)\n    print('Done writing: {}!'.format(filename))\n\n\nif __name__ == '__main__':\n    opts = parse_args()\n    convert(opts.json_file, opts.output_dir)\n"
  },
  {
    "path": "tools/infer.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Perform inference on a single image or all images with a certain extension\n(e.g., .jpg) in a folder. Allows for using a combination of multiple models.\nFor example, one model may be used for RPN, another model for Fast R-CNN style\nbox detection, yet another model to predict masks, and yet another model to\npredict keypoints.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport logging\nimport os\nimport sys\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import load_cfg\nfrom detectron.core.config import merge_cfg_from_cfg\nfrom detectron.core.config import merge_cfg_from_file\nfrom detectron.utils.io import cache_url\nfrom detectron.utils.logging import setup_logging\nimport detectron.core.rpn_generator as rpn_engine\nimport detectron.core.test_engine as model_engine\nimport detectron.datasets.dummy_datasets as dummy_datasets\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.env as envu\nimport detectron.utils.vis as vis_utils\n\nc2_utils.import_detectron_ops()\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\n# infer.py\n#   --im [path/to/image.jpg] \\\n#   --rpn-model [path/to/rpn/model.pkl] \\\n#   --rpn-cfg [path/to/rpn/config.yaml] \\\n#   --output-dir [path/to/output/dir] \\\n#   [model1] [config1] [model2] [config2] ...\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Inference on an image')\n    parser.add_argument(\n        '--im', dest='im_file', help='input image', default=None, type=str\n    )\n    parser.add_argument(\n        '--rpn-pkl',\n        dest='rpn_pkl',\n        help='rpn model file (pkl)',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--rpn-cfg',\n        dest='rpn_cfg',\n        help='cfg model file (yaml)',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--output-dir',\n        dest='output_dir',\n        help='directory for visualization pdfs (default: /tmp/infer)',\n        default='/tmp/infer',\n        type=str\n    )\n    parser.add_argument(\n        'models_to_run',\n        help='pairs of models & configs, listed like so: [pkl1] [yaml1] [pkl2] [yaml2] ...',\n        default=None,\n        nargs=argparse.REMAINDER\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    return parser.parse_args()\n\n\ndef get_rpn_box_proposals(im, args):\n    cfg.immutable(False)\n    merge_cfg_from_file(args.rpn_cfg)\n    cfg.NUM_GPUS = 1\n    cfg.MODEL.RPN_ONLY = True\n    cfg.TEST.RPN_PRE_NMS_TOP_N = 10000\n    cfg.TEST.RPN_POST_NMS_TOP_N = 2000\n    assert_and_infer_cfg(cache_urls=False)\n\n    model = model_engine.initialize_model_from_cfg(args.rpn_pkl)\n    with c2_utils.NamedCudaScope(0):\n        boxes, scores = rpn_engine.im_proposals(model, im)\n    return boxes, scores\n\n\ndef main(args):\n    logger = logging.getLogger(__name__)\n    dummy_coco_dataset = dummy_datasets.get_coco_dataset()\n    cfg_orig = load_cfg(envu.yaml_dump(cfg))\n    im = cv2.imread(args.im_file)\n\n    if args.rpn_pkl is not None:\n        proposal_boxes, _proposal_scores = get_rpn_box_proposals(im, args)\n        workspace.ResetWorkspace()\n    else:\n        proposal_boxes = None\n\n    cls_boxes, cls_segms, cls_keyps = None, None, None\n    for i in range(0, len(args.models_to_run), 2):\n        pkl = args.models_to_run[i]\n        yml = args.models_to_run[i + 1]\n        cfg.immutable(False)\n        merge_cfg_from_cfg(cfg_orig)\n        merge_cfg_from_file(yml)\n        if len(pkl) > 0:\n            weights_file = pkl\n        else:\n            weights_file = cfg.TEST.WEIGHTS\n        cfg.NUM_GPUS = 1\n        assert_and_infer_cfg(cache_urls=False)\n        model = model_engine.initialize_model_from_cfg(weights_file)\n        with c2_utils.NamedCudaScope(0):\n            cls_boxes_, cls_segms_, cls_keyps_ = \\\n                model_engine.im_detect_all(model, im, proposal_boxes)\n        cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes\n        cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms\n        cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps\n        workspace.ResetWorkspace()\n\n    out_name = os.path.join(\n        args.output_dir, '{}'.format(os.path.basename(args.im_file) + '.pdf')\n    )\n    logger.info('Processing {} -> {}'.format(args.im_file, out_name))\n\n    vis_utils.vis_one_image(\n        im[:, :, ::-1],\n        args.im_file,\n        args.output_dir,\n        cls_boxes,\n        cls_segms,\n        cls_keyps,\n        dataset=dummy_coco_dataset,\n        box_alpha=0.3,\n        show_class=True,\n        thresh=0.7,\n        kp_thresh=2\n    )\n\n\ndef check_args(args):\n    assert (\n        (args.rpn_pkl is not None and args.rpn_cfg is not None) or\n        (args.rpn_pkl is None and args.rpn_cfg is None)\n    )\n    if args.rpn_pkl is not None:\n        args.rpn_pkl = cache_url(args.rpn_pkl, cfg.DOWNLOAD_CACHE)\n        assert os.path.exists(args.rpn_pkl)\n        assert os.path.exists(args.rpn_cfg)\n    if args.models_to_run is not None:\n        assert len(args.models_to_run) % 2 == 0\n        for i, model_file in enumerate(args.models_to_run):\n            if len(model_file) > 0:\n                if i % 2 == 0:\n                    model_file = cache_url(model_file, cfg.DOWNLOAD_CACHE)\n                    args.models_to_run[i] = model_file\n                assert os.path.exists(model_file), \\\n                    '\\'{}\\' does not exist'.format(model_file)\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    setup_logging(__name__)\n    args = parse_args()\n    check_args(args)\n    main(args)\n"
  },
  {
    "path": "tools/infer_simple.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Perform inference on a single image or all images with a certain extension\n(e.g., .jpg) in a folder.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom collections import defaultdict\nimport argparse\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport glob\nimport logging\nimport os\nimport sys\nimport time\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import merge_cfg_from_file\nfrom detectron.utils.io import cache_url\nfrom detectron.utils.logging import setup_logging\nfrom detectron.utils.timer import Timer\nimport detectron.core.test_engine as infer_engine\nimport detectron.datasets.dummy_datasets as dummy_datasets\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.vis as vis_utils\n\nc2_utils.import_detectron_ops()\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='End-to-end inference')\n    parser.add_argument(\n        '--cfg',\n        dest='cfg',\n        help='cfg model file (/path/to/model_config.yaml)',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--wts',\n        dest='weights',\n        help='weights model file (/path/to/model_weights.pkl)',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--output-dir',\n        dest='output_dir',\n        help='directory for visualization pdfs (default: /tmp/infer_simple)',\n        default='/tmp/infer_simple',\n        type=str\n    )\n    parser.add_argument(\n        '--image-ext',\n        dest='image_ext',\n        help='image file name extension (default: jpg)',\n        default='jpg',\n        type=str\n    )\n    parser.add_argument(\n        '--always-out',\n        dest='out_when_no_box',\n        help='output image even when no object is found',\n        action='store_true'\n    )\n    parser.add_argument(\n        '--output-ext',\n        dest='output_ext',\n        help='output image file format (default: pdf)',\n        default='pdf',\n        type=str\n    )\n    parser.add_argument(\n        '--thresh',\n        dest='thresh',\n        help='Threshold for visualizing detections',\n        default=0.7,\n        type=float\n    )\n    parser.add_argument(\n        '--kp-thresh',\n        dest='kp_thresh',\n        help='Threshold for visualizing keypoints',\n        default=2.0,\n        type=float\n    )\n    parser.add_argument(\n        'im_or_folder', help='image or folder of images', default=None\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    return parser.parse_args()\n\n\ndef main(args):\n    logger = logging.getLogger(__name__)\n\n    merge_cfg_from_file(args.cfg)\n    cfg.NUM_GPUS = 1\n    args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE)\n    assert_and_infer_cfg(cache_urls=False)\n\n    assert not cfg.MODEL.RPN_ONLY, \\\n        'RPN models are not supported'\n    assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \\\n        'Models that require precomputed proposals are not supported'\n\n    model = infer_engine.initialize_model_from_cfg(args.weights)\n    dummy_coco_dataset = dummy_datasets.get_coco_dataset()\n\n    if os.path.isdir(args.im_or_folder):\n        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)\n    else:\n        im_list = [args.im_or_folder]\n\n    for i, im_name in enumerate(im_list):\n        out_name = os.path.join(\n            args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext)\n        )\n        logger.info('Processing {} -> {}'.format(im_name, out_name))\n        im = cv2.imread(im_name)\n        timers = defaultdict(Timer)\n        t = time.time()\n        with c2_utils.NamedCudaScope(0):\n            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(\n                model, im, None, timers=timers\n            )\n        logger.info('Inference time: {:.3f}s'.format(time.time() - t))\n        for k, v in timers.items():\n            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))\n        if i == 0:\n            logger.info(\n                ' \\ Note: inference on the first image will be slower than the '\n                'rest (caches and auto-tuning need to warm up)'\n            )\n\n        vis_utils.vis_one_image(\n            im[:, :, ::-1],  # BGR -> RGB for visualization\n            im_name,\n            args.output_dir,\n            cls_boxes,\n            cls_segms,\n            cls_keyps,\n            dataset=dummy_coco_dataset,\n            box_alpha=0.3,\n            show_class=True,\n            thresh=args.thresh,\n            kp_thresh=args.kp_thresh,\n            ext=args.output_ext,\n            out_when_no_box=args.out_when_no_box\n        )\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    setup_logging(__name__)\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "tools/pickle_caffe_blobs.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Script for converting Caffe (<= 1.0) models into the the simple state dict\nformat used by Detectron. For example, this script can convert the orignal\nResNet models released by MSRA.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport numpy as np\nimport os\nimport sys\n\nfrom caffe.proto import caffe_pb2\nfrom caffe2.proto import caffe2_pb2\nfrom caffe2.python import caffe_translator\nfrom caffe2.python import utils\nfrom google.protobuf import text_format\n\nfrom detectron.utils.io import save_object\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Dump weights from a Caffe model'\n    )\n    parser.add_argument(\n        '--prototxt',\n        dest='prototxt_file_name',\n        help='Network definition prototxt file path',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--caffemodel',\n        dest='caffemodel_file_name',\n        help='Pretrained network weights file path',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--output',\n        dest='out_file_name',\n        help='Output file path',\n        default=None,\n        type=str\n    )\n\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n\n    args = parser.parse_args()\n    return args\n\n\ndef normalize_resnet_name(name):\n    if name.find('res') == 0 and name.find('res_') == -1:\n        # E.g.,\n        #  res4b11_branch2c -> res4_11_branch2c\n        #  res2a_branch1 -> res2_0_branch1\n        chunk = name[len('res'):name.find('_')]\n        name = (\n            'res' + chunk[0] + '_' + str(\n                int(chunk[2:]) if len(chunk) > 2  # e.g., \"b1\" -> 1\n                else ord(chunk[1]) - ord('a')\n            ) +  # e.g., \"a\" -> 0\n            name[name.find('_'):]\n        )\n    return name\n\n\ndef pickle_weights(out_file_name, weights):\n    blobs = {\n        normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob)\n        for blob in weights.protos\n    }\n    save_object(blobs, out_file_name)\n    print('Wrote blobs:')\n    print(sorted(blobs.keys()))\n\n\ndef add_missing_biases(caffenet_weights):\n    for layer in caffenet_weights.layer:\n        if layer.type == 'Convolution' and len(layer.blobs) == 1:\n            num_filters = layer.blobs[0].shape.dim[0]\n            bias_blob = caffe_pb2.BlobProto()\n            bias_blob.data.extend(np.zeros(num_filters))\n            bias_blob.num, bias_blob.channels, bias_blob.height = 1, 1, 1\n            bias_blob.width = num_filters\n            layer.blobs.extend([bias_blob])\n\n\ndef remove_spatial_bn_layers(caffenet, caffenet_weights):\n    # Layer types associated with spatial batch norm\n    remove_types = ['BatchNorm', 'Scale']\n\n    def _remove_layers(net):\n        for i in reversed(range(len(net.layer))):\n            if net.layer[i].type in remove_types:\n                net.layer.pop(i)\n\n    # First remove layers from caffenet proto\n    _remove_layers(caffenet)\n    # We'll return these so we can save the batch norm parameters\n    bn_layers = [\n        layer for layer in caffenet_weights.layer if layer.type in remove_types\n    ]\n    _remove_layers(caffenet_weights)\n\n    def _create_tensor(arr, shape, name):\n        t = caffe2_pb2.TensorProto()\n        t.name = name\n        t.data_type = caffe2_pb2.TensorProto.FLOAT\n        t.dims.extend(shape.dim)\n        t.float_data.extend(arr)\n        assert len(t.float_data) == np.prod(t.dims), 'Data size, shape mismatch'\n        return t\n\n    bn_tensors = []\n    for (bn, scl) in zip(bn_layers[0::2], bn_layers[1::2]):\n        assert bn.name[len('bn'):] == scl.name[len('scale'):], 'Pair mismatch'\n        blob_out = 'res' + bn.name[len('bn'):] + '_bn'\n        bn_mean = np.asarray(bn.blobs[0].data)\n        bn_var = np.asarray(bn.blobs[1].data)\n        scale = np.asarray(scl.blobs[0].data)\n        bias = np.asarray(scl.blobs[1].data)\n        std = np.sqrt(bn_var + 1e-5)\n        new_scale = scale / std\n        new_bias = bias - bn_mean * scale / std\n        new_scale_tensor = _create_tensor(\n            new_scale, bn.blobs[0].shape, blob_out + '_s'\n        )\n        new_bias_tensor = _create_tensor(\n            new_bias, bn.blobs[0].shape, blob_out + '_b'\n        )\n        bn_tensors.extend([new_scale_tensor, new_bias_tensor])\n    return bn_tensors\n\n\ndef remove_layers_without_parameters(caffenet, caffenet_weights):\n    for i in reversed(range(len(caffenet_weights.layer))):\n        if len(caffenet_weights.layer[i].blobs) == 0:\n            # Search for the corresponding layer in caffenet and remove it\n            name = caffenet_weights.layer[i].name\n            found = False\n            for j in range(len(caffenet.layer)):\n                if caffenet.layer[j].name == name:\n                    caffenet.layer.pop(j)\n                    found = True\n                    break\n            if not found and name[-len('_split'):] != '_split':\n                print('Warning: layer {} not found in caffenet'.format(name))\n            caffenet_weights.layer.pop(i)\n\n\ndef normalize_shape(caffenet_weights):\n    for layer in caffenet_weights.layer:\n        for blob in layer.blobs:\n            shape = (blob.num, blob.channels, blob.height, blob.width)\n            if len(blob.data) != np.prod(shape):\n                shape = tuple(blob.shape.dim)\n                if len(shape) == 1:\n                    # Handle biases\n                    shape = (1, 1, 1, shape[0])\n                if len(shape) == 2:\n                    # Handle InnerProduct layers\n                    shape = (1, 1, shape[0], shape[1])\n                assert len(shape) == 4\n                blob.num, blob.channels, blob.height, blob.width = shape\n\n\ndef load_and_convert_caffe_model(prototxt_file_name, caffemodel_file_name):\n    caffenet = caffe_pb2.NetParameter()\n    caffenet_weights = caffe_pb2.NetParameter()\n    text_format.Merge(open(prototxt_file_name).read(), caffenet)\n    caffenet_weights.ParseFromString(open(caffemodel_file_name).read())\n    # C2 conv layers current require biases, but they are optional in C1\n    # Add zeros as biases is they are missing\n    add_missing_biases(caffenet_weights)\n    # We only care about getting parameters, so remove layers w/o parameters\n    remove_layers_without_parameters(caffenet, caffenet_weights)\n    # BatchNorm is not implemented in the translator *and* we need to fold Scale\n    # layers into the new C2 SpatialBN op, hence we remove the batch norm layers\n    # and apply custom translations code\n    bn_weights = remove_spatial_bn_layers(caffenet, caffenet_weights)\n    # Set num, channel, height and width for blobs that use shape.dim instead\n    normalize_shape(caffenet_weights)\n    # Translate the rest of the model\n    net, pretrained_weights = caffe_translator.TranslateModel(\n        caffenet, caffenet_weights\n    )\n    pretrained_weights.protos.extend(bn_weights)\n    return net, pretrained_weights\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    assert os.path.exists(args.prototxt_file_name), \\\n        'Prototxt file does not exist'\n    assert os.path.exists(args.caffemodel_file_name), \\\n        'Weights file does not exist'\n    net, weights = load_and_convert_caffe_model(\n        args.prototxt_file_name, args.caffemodel_file_name\n    )\n    pickle_weights(args.out_file_name, weights)\n"
  },
  {
    "path": "tools/reval.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n#\n# Based on:\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\n\"\"\"Reval = re-eval. Re-evaluate saved detections.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport os\nimport sys\n\nfrom detectron.core.config import cfg\nfrom detectron.datasets import task_evaluation\nfrom detectron.datasets.json_dataset import JsonDataset\nfrom detectron.utils.io import load_object\nfrom detectron.utils.logging import setup_logging\nimport detectron.core.config as core_config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Re-evaluate results')\n    parser.add_argument(\n        'output_dir', nargs=1, help='results directory', type=str\n    )\n    parser.add_argument(\n        '--dataset',\n        dest='dataset_name',\n        help='dataset to re-evaluate',\n        default='voc_2007_test',\n        type=str\n    )\n    parser.add_argument(\n        '--matlab',\n        dest='matlab_eval',\n        help='use matlab for evaluation',\n        action='store_true'\n    )\n    parser.add_argument(\n        '--comp',\n        dest='comp_mode',\n        help='competition mode',\n        action='store_true'\n    )\n    parser.add_argument(\n        '--cfg',\n        dest='cfg_file',\n        help='optional config file',\n        default=None,\n        type=str\n    )\n\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n\n    args = parser.parse_args()\n    return args\n\n\ndef do_reval(dataset_name, output_dir, args):\n    dataset = JsonDataset(dataset_name)\n    dets = load_object(os.path.join(output_dir, 'detections.pkl'))\n\n    # Override config with the one saved in the detections file\n    if args.cfg_file is not None:\n        core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg']))\n    else:\n        core_config._merge_a_into_b(core_config.load_cfg(dets['cfg']), cfg)\n    results = task_evaluation.evaluate_all(\n        dataset,\n        dets['all_boxes'],\n        dets['all_segms'],\n        dets['all_keyps'],\n        output_dir,\n        use_matlab=args.matlab_eval\n    )\n    task_evaluation.log_copy_paste_friendly_results(results)\n\n\nif __name__ == '__main__':\n    setup_logging(__name__)\n    args = parse_args()\n    if args.comp_mode:\n        cfg.TEST.COMPETITION_MODE = True\n    output_dir = os.path.abspath(args.output_dir[0])\n    do_reval(args.dataset_name, output_dir, args)\n"
  },
  {
    "path": "tools/test_net.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Perform inference on one or more datasets.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport os\nimport pprint\nimport sys\nimport time\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import merge_cfg_from_file\nfrom detectron.core.config import merge_cfg_from_list\nfrom detectron.core.test_engine import run_inference\nfrom detectron.utils.logging import setup_logging\nimport detectron.utils.c2 as c2_utils\n\nc2_utils.import_detectron_ops()\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')\n    parser.add_argument(\n        '--cfg',\n        dest='cfg_file',\n        help='optional config file',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--wait',\n        dest='wait',\n        help='wait until net file exists',\n        default=True,\n        type=bool\n    )\n    parser.add_argument(\n        '--vis', dest='vis', help='visualize detections', action='store_true'\n    )\n    parser.add_argument(\n        '--multi-gpu-testing',\n        dest='multi_gpu_testing',\n        help='using cfg.NUM_GPUS for inference',\n        action='store_true'\n    )\n    parser.add_argument(\n        '--range',\n        dest='range',\n        help='start (inclusive) and end (exclusive) indices',\n        default=None,\n        type=int,\n        nargs=2\n    )\n    parser.add_argument(\n        'opts',\n        help='See detectron/core/config.py for all options',\n        default=None,\n        nargs=argparse.REMAINDER\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    return parser.parse_args()\n\n\nif __name__ == '__main__':\n    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])\n    logger = setup_logging(__name__)\n    args = parse_args()\n    logger.info('Called with args:')\n    logger.info(args)\n    if args.cfg_file is not None:\n        merge_cfg_from_file(args.cfg_file)\n    if args.opts is not None:\n        merge_cfg_from_list(args.opts)\n    assert_and_infer_cfg()\n    logger.info('Testing with config:')\n    logger.info(pprint.pformat(cfg))\n\n    while not os.path.exists(cfg.TEST.WEIGHTS) and args.wait:\n        logger.info('Waiting for \\'{}\\' to exist...'.format(cfg.TEST.WEIGHTS))\n        time.sleep(10)\n\n    run_inference(\n        cfg.TEST.WEIGHTS,\n        ind_range=args.range,\n        multi_gpu_testing=args.multi_gpu_testing,\n        check_expected_results=True,\n    )\n"
  },
  {
    "path": "tools/train_net.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Train a network with Detectron.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)\nimport logging\nimport numpy as np\nimport pprint\nimport sys\n\nfrom caffe2.python import workspace\n\nfrom detectron.core.config import assert_and_infer_cfg\nfrom detectron.core.config import cfg\nfrom detectron.core.config import merge_cfg_from_file\nfrom detectron.core.config import merge_cfg_from_list\nfrom detectron.core.test_engine import run_inference\nfrom detectron.utils.logging import setup_logging\nimport detectron.utils.c2 as c2_utils\nimport detectron.utils.train\n\nc2_utils.import_contrib_ops()\nc2_utils.import_detectron_ops()\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Train a network with Detectron'\n    )\n    parser.add_argument(\n        '--cfg',\n        dest='cfg_file',\n        help='Config file for training (and optionally testing)',\n        default=None,\n        type=str\n    )\n    parser.add_argument(\n        '--multi-gpu-testing',\n        dest='multi_gpu_testing',\n        help='Use cfg.NUM_GPUS GPUs for inference',\n        action='store_true'\n    )\n    parser.add_argument(\n        '--skip-test',\n        dest='skip_test',\n        help='Do not test the final model',\n        action='store_true'\n    )\n    parser.add_argument(\n        'opts',\n        help='See detectron/core/config.py for all options',\n        default=None,\n        nargs=argparse.REMAINDER\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    return parser.parse_args()\n\n\ndef main():\n    # Initialize C2\n    workspace.GlobalInit(\n        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']\n    )\n    # Set up logging and load config options\n    logger = setup_logging(__name__)\n    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)\n    args = parse_args()\n    logger.info('Called with args:')\n    logger.info(args)\n    if args.cfg_file is not None:\n        merge_cfg_from_file(args.cfg_file)\n    if args.opts is not None:\n        merge_cfg_from_list(args.opts)\n    assert_and_infer_cfg()\n    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()\n    logger.info(\"cuda version : {}\".format(cuda_ver))\n    logger.info(\"cudnn version: {}\".format(cudnn_ver))\n    logger.info(\"nvidia-smi output:\\n{}\".format(smi_output))\n    logger.info('Training with config:')\n    logger.info(pprint.pformat(cfg))\n    # Note that while we set the numpy random seed network training will not be\n    # deterministic in general. There are sources of non-determinism that cannot\n    # be removed with a reasonble execution-speed tradeoff (such as certain\n    # non-deterministic cudnn functions).\n    np.random.seed(cfg.RNG_SEED)\n    # Execute the training run\n    checkpoints = detectron.utils.train.train_model()\n    # Test the trained model\n    if not args.skip_test:\n        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)\n\n\ndef test_model(model_file, multi_gpu_testing, opts=None):\n    \"\"\"Test a model.\"\"\"\n    # Clear memory before inference\n    workspace.ResetWorkspace()\n    # Run inference\n    run_inference(\n        model_file, multi_gpu_testing=multi_gpu_testing,\n        check_expected_results=True,\n    )\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/visualize_results.py",
    "content": "#!/usr/bin/env python\n\n# Copyright (c) 2017-present, Facebook, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n##############################################################################\n\n\"\"\"Script for visualizing results saved in a detections.pkl file.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport argparse\nimport cv2\nimport os\nimport sys\n\nfrom detectron.datasets.json_dataset import JsonDataset\nfrom detectron.utils.io import load_object\nimport detectron.utils.vis as vis_utils\n\n# OpenCL may be enabled by default in OpenCV3; disable it because it's not\n# thread safe and causes unwanted GPU memory allocations.\ncv2.ocl.setUseOpenCL(False)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        '--dataset',\n        dest='dataset',\n        help='dataset',\n        default='coco_2014_minival',\n        type=str\n    )\n    parser.add_argument(\n        '--detections',\n        dest='detections',\n        help='detections pkl file',\n        default='',\n        type=str\n    )\n    parser.add_argument(\n        '--thresh',\n        dest='thresh',\n        help='detection prob threshold',\n        default=0.9,\n        type=float\n    )\n    parser.add_argument(\n        '--output-dir',\n        dest='output_dir',\n        help='output directory',\n        default='./tmp/vis-output',\n        type=str\n    )\n    parser.add_argument(\n        '--first',\n        dest='first',\n        help='only visualize the first k images',\n        default=0,\n        type=int\n    )\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n    args = parser.parse_args()\n    return args\n\n\ndef vis(dataset, detections_pkl, thresh, output_dir, limit=0):\n    ds = JsonDataset(dataset)\n    roidb = ds.get_roidb()\n\n    dets = load_object(detections_pkl)\n\n    assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \\\n        'Expected detections pkl file in the format used by test_engine.py'\n\n    all_boxes = dets['all_boxes']\n    all_segms = dets['all_segms']\n    all_keyps = dets['all_keyps']\n\n    def id_or_index(ix, val):\n        if len(val) == 0:\n            return val\n        else:\n            return val[ix]\n\n    for ix, entry in enumerate(roidb):\n        if limit > 0 and ix >= limit:\n            break\n        if ix % 10 == 0:\n            print('{:d}/{:d}'.format(ix + 1, len(roidb)))\n\n        im = cv2.imread(entry['image'])\n        im_name = os.path.splitext(os.path.basename(entry['image']))[0]\n\n        cls_boxes_i = [\n            id_or_index(ix, cls_k_boxes) for cls_k_boxes in all_boxes\n        ]\n        cls_segms_i = [\n            id_or_index(ix, cls_k_segms) for cls_k_segms in all_segms\n        ]\n        cls_keyps_i = [\n            id_or_index(ix, cls_k_keyps) for cls_k_keyps in all_keyps\n        ]\n\n        vis_utils.vis_one_image(\n            im[:, :, ::-1],\n            '{:d}_{:s}'.format(ix, im_name),\n            os.path.join(output_dir, 'vis'),\n            cls_boxes_i,\n            segms=cls_segms_i,\n            keypoints=cls_keyps_i,\n            thresh=thresh,\n            box_alpha=0.8,\n            dataset=ds,\n            show_class=True\n        )\n\n\nif __name__ == '__main__':\n    opts = parse_args()\n    vis(\n        opts.dataset,\n        opts.detections,\n        opts.thresh,\n        opts.output_dir,\n        limit=opts.first\n    )\n"
  }
]