Repository: facebookresearch/Detectron Branch: main Commit: 04155a01a6ea Files: 210 Total size: 1008.7 KB Directory structure: gitextract_5vn9xfb_/ ├── .github/ │ └── issue_template.md ├── .gitignore ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FAQ.md ├── GETTING_STARTED.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── Makefile ├── NOTICE ├── README.md ├── cmake/ │ ├── Summary.cmake │ └── legacy/ │ ├── Cuda.cmake │ ├── Dependencies.cmake │ ├── Modules/ │ │ └── FindCuDNN.cmake │ ├── Summary.cmake │ ├── Utils.cmake │ └── legacymake.cmake ├── configs/ │ ├── 04_2018_gn_baselines/ │ │ ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml │ │ ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ │ ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml │ │ ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml │ │ ├── mask_rcnn_R-50-FPN_1x_gn.yaml │ │ ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ │ └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml │ ├── 12_2017_baselines/ │ │ ├── e2e_faster_rcnn_R-101-FPN_1x.yaml │ │ ├── e2e_faster_rcnn_R-101-FPN_2x.yaml │ │ ├── e2e_faster_rcnn_R-50-C4_1x.yaml │ │ ├── e2e_faster_rcnn_R-50-C4_2x.yaml │ │ ├── e2e_faster_rcnn_R-50-FPN_1x.yaml │ │ ├── e2e_faster_rcnn_R-50-FPN_2x.yaml │ │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml │ │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml │ │ ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml │ │ ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml │ │ ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml │ │ ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml │ │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ │ ├── e2e_mask_rcnn_R-101-FPN_1x.yaml │ │ ├── e2e_mask_rcnn_R-101-FPN_2x.yaml │ │ ├── e2e_mask_rcnn_R-50-C4_1x.yaml │ │ ├── e2e_mask_rcnn_R-50-C4_2x.yaml │ │ ├── e2e_mask_rcnn_R-50-FPN_1x.yaml │ │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml │ │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml │ │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml │ │ ├── fast_rcnn_R-101-FPN_1x.yaml │ │ ├── fast_rcnn_R-101-FPN_2x.yaml │ │ ├── fast_rcnn_R-50-C4_1x.yaml │ │ ├── fast_rcnn_R-50-C4_2x.yaml │ │ ├── fast_rcnn_R-50-FPN_1x.yaml │ │ ├── fast_rcnn_R-50-FPN_2x.yaml │ │ ├── fast_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── fast_rcnn_X-101-32x8d-FPN_2x.yaml │ │ ├── fast_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── fast_rcnn_X-101-64x4d-FPN_2x.yaml │ │ ├── keypoint_rcnn_R-101-FPN_1x.yaml │ │ ├── keypoint_rcnn_R-101-FPN_s1x.yaml │ │ ├── keypoint_rcnn_R-50-FPN_1x.yaml │ │ ├── keypoint_rcnn_R-50-FPN_s1x.yaml │ │ ├── keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ │ ├── keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ │ ├── mask_rcnn_R-101-FPN_1x.yaml │ │ ├── mask_rcnn_R-101-FPN_2x.yaml │ │ ├── mask_rcnn_R-50-C4_1x.yaml │ │ ├── mask_rcnn_R-50-C4_2x.yaml │ │ ├── mask_rcnn_R-50-FPN_1x.yaml │ │ ├── mask_rcnn_R-50-FPN_2x.yaml │ │ ├── mask_rcnn_X-101-32x8d-FPN_1x.yaml │ │ ├── mask_rcnn_X-101-32x8d-FPN_2x.yaml │ │ ├── mask_rcnn_X-101-64x4d-FPN_1x.yaml │ │ ├── mask_rcnn_X-101-64x4d-FPN_2x.yaml │ │ ├── retinanet_R-101-FPN_1x.yaml │ │ ├── retinanet_R-101-FPN_2x.yaml │ │ ├── retinanet_R-50-FPN_1x.yaml │ │ ├── retinanet_R-50-FPN_2x.yaml │ │ ├── retinanet_X-101-32x8d-FPN_1x.yaml │ │ ├── retinanet_X-101-32x8d-FPN_2x.yaml │ │ ├── retinanet_X-101-64x4d-FPN_1x.yaml │ │ ├── retinanet_X-101-64x4d-FPN_2x.yaml │ │ ├── rpn_R-101-FPN_1x.yaml │ │ ├── rpn_R-50-C4_1x.yaml │ │ ├── rpn_R-50-FPN_1x.yaml │ │ ├── rpn_X-101-32x8d-FPN_1x.yaml │ │ ├── rpn_X-101-64x4d-FPN_1x.yaml │ │ ├── rpn_person_only_R-101-FPN_1x.yaml │ │ ├── rpn_person_only_R-50-FPN_1x.yaml │ │ ├── rpn_person_only_X-101-32x8d-FPN_1x.yaml │ │ └── rpn_person_only_X-101-64x4d-FPN_1x.yaml │ ├── getting_started/ │ │ ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml │ │ ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml │ │ ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml │ │ └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml │ └── test_time_aug/ │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ └── keypoint_rcnn_R-50-FPN_1x.yaml ├── demo/ │ └── NOTICE ├── detectron/ │ ├── __init__.py │ ├── core/ │ │ ├── __init__.py │ │ ├── config.py │ │ ├── rpn_generator.py │ │ ├── test.py │ │ ├── test_engine.py │ │ └── test_retinanet.py │ ├── datasets/ │ │ ├── VOCdevkit-matlab-wrapper/ │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── cityscapes_json_dataset_evaluator.py │ │ ├── coco_to_cityscapes_id.py │ │ ├── data/ │ │ │ └── README.md │ │ ├── dataset_catalog.py │ │ ├── dummy_datasets.py │ │ ├── json_dataset.py │ │ ├── json_dataset_evaluator.py │ │ ├── roidb.py │ │ ├── task_evaluation.py │ │ ├── voc_dataset_evaluator.py │ │ └── voc_eval.py │ ├── modeling/ │ │ ├── FPN.py │ │ ├── ResNet.py │ │ ├── VGG16.py │ │ ├── VGG_CNN_M_1024.py │ │ ├── __init__.py │ │ ├── detector.py │ │ ├── fast_rcnn_heads.py │ │ ├── generate_anchors.py │ │ ├── keypoint_rcnn_heads.py │ │ ├── mask_rcnn_heads.py │ │ ├── model_builder.py │ │ ├── name_compat.py │ │ ├── optimizer.py │ │ ├── retinanet_heads.py │ │ ├── rfcn_heads.py │ │ └── rpn_heads.py │ ├── ops/ │ │ ├── __init__.py │ │ ├── collect_and_distribute_fpn_rpn_proposals.py │ │ ├── generate_proposal_labels.py │ │ ├── generate_proposals.py │ │ ├── zero_even_op.cc │ │ ├── zero_even_op.cu │ │ └── zero_even_op.h │ ├── roi_data/ │ │ ├── __init__.py │ │ ├── data_utils.py │ │ ├── fast_rcnn.py │ │ ├── keypoint_rcnn.py │ │ ├── loader.py │ │ ├── mask_rcnn.py │ │ ├── minibatch.py │ │ ├── retinanet.py │ │ └── rpn.py │ ├── tests/ │ │ ├── data_loader_benchmark.py │ │ ├── test_batch_permutation_op.py │ │ ├── test_bbox_transform.py │ │ ├── test_cfg.py │ │ ├── test_loader.py │ │ ├── test_restore_checkpoint.py │ │ ├── test_smooth_l1_loss_op.py │ │ ├── test_spatial_narrow_as_op.py │ │ └── test_zero_even_op.py │ └── utils/ │ ├── __init__.py │ ├── blob.py │ ├── boxes.py │ ├── c2.py │ ├── collections.py │ ├── colormap.py │ ├── coordinator.py │ ├── cython_bbox.pyx │ ├── cython_nms.pyx │ ├── env.py │ ├── image.py │ ├── io.py │ ├── keypoints.py │ ├── logging.py │ ├── lr_policy.py │ ├── model_convert_utils.py │ ├── net.py │ ├── segms.py │ ├── subprocess.py │ ├── timer.py │ ├── train.py │ ├── training_stats.py │ └── vis.py ├── docker/ │ └── Dockerfile ├── projects/ │ └── GN/ │ └── README.md ├── requirements.txt ├── setup.py └── tools/ ├── convert_cityscapes_to_coco.py ├── convert_coco_model_to_cityscapes.py ├── convert_pkl_to_pb.py ├── convert_selective_search.py ├── generate_testdev_from_test.py ├── infer.py ├── infer_simple.py ├── pickle_caffe_blobs.py ├── reval.py ├── test_net.py ├── train_net.py └── visualize_results.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/issue_template.md ================================================ ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING 1. Please thoroughly read README.md, INSTALL.md, GETTING_STARTED.md, and FAQ.md 2. Please search existing *open and closed* issues in case your issue has already been reported 3. Please try to debug the issue in case you can solve it on your own before posting ## After following steps 1-3 above and agreeing to provide the detailed information requested below, you may continue with posting your issue (**Delete this line and the text above it.**) ### Expected results What did you expect to see? ### Actual results What did you observe instead? ### Detailed steps to reproduce E.g.: ``` The command that you ran ``` ### System information * Operating system: ? * Compiler version: ? * CUDA version: ? * cuDNN version: ? * NVIDIA driver version: ? * GPU models (for all devices if they are not all the same): ? * `PYTHONPATH` environment variable: ? * `python --version` output: ? * Anything else that seems relevant: ? ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # Shared objects *.so # Distribution / packaging build/ *.egg-info/ *.egg # Temporary files *.swn *.swo *.swp # Dataset symlinks detectron/datasets/data/* !detectron/datasets/data/README.md # Generated C files detectron/utils/cython_*.c ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) # Find the Caffe2 package. # Caffe2 exports the required targets, so find_package should work for # the standard Caffe2 installation. If you encounter problems with finding # the Caffe2 package, make sure you have run `make install` when installing # Caffe2 (`make install` populates your share/cmake/Caffe2). find_package(Caffe2 REQUIRED) if (${CAFFE2_VERSION} VERSION_LESS 0.8.2) # Pre-0.8.2 caffe2 does not have proper interface libraries set up, so we # will rely on the old path. message(WARNING "You are using an older version of Caffe2 (version " ${CAFFE2_VERSION} "). Please consider moving to a newer version.") include(cmake/legacy/legacymake.cmake) return() endif() # Add compiler flags. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing") # Print configuration summary. include(cmake/Summary.cmake) detectron_print_config_summary() # Collect custom ops sources. file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc) file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu) # Install custom CPU ops lib. add_library( caffe2_detectron_custom_ops SHARED ${CUSTOM_OPS_CPU_SRCS}) target_include_directories( caffe2_detectron_custom_ops PRIVATE ${CAFFE2_INCLUDE_DIRS}) target_link_libraries(caffe2_detectron_custom_ops caffe2_library) install(TARGETS caffe2_detectron_custom_ops DESTINATION lib) # Install custom GPU ops lib, if gpu is present. if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA) # Additional -I prefix is required for CMake versions before commit (< 3.7): # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594 list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS}) CUDA_ADD_LIBRARY( caffe2_detectron_custom_ops_gpu SHARED ${CUSTOM_OPS_CPU_SRCS} ${CUSTOM_OPS_GPU_SRCS}) target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu_library) install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib) endif() ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at . All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Detectron We want to make contributing to this project as easy and transparent as possible. ## Our Development Process Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis. ## Pull Requests We actively welcome your pull requests. 1. Fork the repo and create your branch from `master`. 2. If you've added code that should be tested, add tests. 3. If you've changed APIs, update the documentation. 4. Ensure the test suite passes. 5. Make sure your code lints. 6. Ensure no regressions in baseline model speed and accuracy. 7. If you haven't already, complete the Contributor License Agreement ("CLA"). ## Contributor License Agreement ("CLA") In order to accept your pull request, we need you to submit a CLA. You only need to do this once to work on any of Facebook's open source projects. Complete your CLA here: ## Issues GitHub issues will be largely unattended and are mainly intended as a community forum for collectively debugging issues, hopefully leading to pull requests with fixes when appropriate. ## Coding Style * 4 spaces for indentation rather than tabs * 80 character line length * PEP8 formatting ## License By contributing to Detectron, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree. ================================================ FILE: FAQ.md ================================================ # FAQ This document covers frequently asked questions. - For general information about Detectron, please see [`README.md`](README.md). - For installation instructions, please see [`INSTALL.md`](INSTALL.md). - For a quick getting started guide, please see [`GETTING_STARTED.md`](GETTING_STARTED.md). #### Q: How do I compute validation AP during training? **A:** Detectron does not compute validation statistics (e.g., AP) during training because this slows training. Instead, we've implemented a "validation monitor", which is a process that polls for new model checkpoints saved by a training job and when one is found performs inference with it by scheduling a job with `tools/test_net.py` asynchronously using free GPUs in our cluster. We have not released the validation monitor because (1) it's a relatively thin wrapper on top of `tools/train_net.py` and (2) the little code that comprises it is specific to our cluster and would not be generally useful. #### Q: How do I restrict Detectron to use only a subset of the GPUs on a server? **A:** Don't modify the code; use the [`CUDA_VISIBLE_DEVICES`](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) environment variable instead. #### Q: Detection on one image is really slow compared to the reported performance, why? A: Various algorithms and caches (e.g., from `cudnn`) take some time to warm up. Peak inference performance will not be reached until after a few images have been processed. Also potentially relevant: inference with Mask R-CNN on high-resolution images may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image. #### Q: How do I implement a custom Caffe2 CPU or GPU operator for use in Detectron? **A:** Detectron uses a number of specialized Caffe2 operators that are distributed via the [Caffe2 Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron) as part of the core Caffe2 GitHub repository. If you'd like to implement a custom Caffe2 operator for your project, we have written a toy example illustrating how to add an operator under the Detectron source tree; please see [`detectron/ops/zero_even_op.*`](detectron/ops/) and [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py). For more background on writing Caffe2 operators please consult the [Caffe2 documentation](https://caffe2.ai/docs/custom-operators.html). #### Q: How do I use Detectron to train a model on a custom dataset? **A:** If possible, we strongly recommend that you first convert the custom dataset annotation format to the [COCO API json format](http://cocodataset.org/#download). Then, add your dataset to the [dataset catalog](detectron/datasets/dataset_catalog.py) so that Detectron can use it for training and inference. If your dataset cannot be converted to the COCO API json format, then it's likely that more significant code modifications will be required. If the dataset you're adding is popular, please consider making the converted annotations publicly available; If code modifications are required, please consider submitting a pull request. ================================================ FILE: GETTING_STARTED.md ================================================ # Using Detectron This document provides brief tutorials covering Detectron for inference and training on the COCO dataset. - For general information about Detectron, please see [`README.md`](README.md). - For installation instructions, please see [`INSTALL.md`](INSTALL.md). ## Inference with Pretrained Models #### 1. Directory of Image Files To run inference on a directory of image files (`demo/*.jpg` in this example), you can use the `infer_simple.py` tool. In this example, we're using an end-to-end trained Mask R-CNN model with a ResNet-101-FPN backbone from the model zoo: ``` python tools/infer_simple.py \ --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \ --output-dir /tmp/detectron-visualizations \ --image-ext jpg \ --wts https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \ demo ``` Detectron should automatically download the model from the URL specified by the `--wts` argument. This tool will output visualizations of the detections in PDF format in the directory specified by `--output-dir`. Here's an example of the output you should expect to see (for copyright information about the demo images see [`demo/NOTICE`](demo/NOTICE)).

Example Mask R-CNN output.

**Notes:** - When running inference on your own high-resolution images, Mask R-CNN may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image. #### 2. COCO Dataset This example shows how to run an end-to-end trained Mask R-CNN model from the model zoo using a single GPU for inference. As configured, this will run inference on all images in `coco_2014_minival` (which must be properly installed). ``` python tools/test_net.py \ --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \ TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \ NUM_GPUS 1 ``` Running inference with the same model using `$N` GPUs (e.g., `N=8`). ``` python tools/test_net.py \ --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml \ --multi-gpu-testing \ TEST.WEIGHTS https://dl.fbaipublicfiles.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl \ NUM_GPUS $N ``` On an NVIDIA Tesla P100 GPU, inference should take about 130-140 ms per image for this example. ## Training a Model with Detectron This is a tiny tutorial showing how to train a model on COCO. The model will be an end-to-end trained Faster R-CNN using a ResNet-50-FPN backbone. For the purpose of this tutorial, we'll use a short training schedule and a small input image size so that training and inference will be relatively fast. As a result, the box AP on COCO will be relatively low compared to our [baselines](MODEL_ZOO.md). This example is provided for instructive purposes only (i.e., not for comparing against publications). #### 1. Training with 1 GPU ``` python tools/train_net.py \ --cfg configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml \ OUTPUT_DIR /tmp/detectron-output ``` **Expected results:** - Output (models, validation set detections, etc.) will be saved under `/tmp/detectron-output` - On a Maxwell generation GPU (e.g., M40), training should take around 4.2 hours - Inference time should be around 80ms / image (also on an M40) - Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs) ### 2. Multi-GPU Training We've also provided configs to illustrate training with 2, 4, and 8 GPUs using learning schedules that will be approximately equivalent to the one used with 1 GPU above. The configs are located at: `configs/getting_started/tutorial_{2,4,8}gpu_e2e_faster_rcnn_R-50-FPN.yaml`. For example, launching a training job with 2 GPUs will look like this: ``` python tools/train_net.py \ --multi-gpu-testing \ --cfg configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml \ OUTPUT_DIR /tmp/detectron-output ``` Note that we've also added the `--multi-gpu-testing` flag to instruct Detectron to parallelize inference over multiple GPUs (2 in this example; see `NUM_GPUS` in the config file) after training has finished. **Expected results:** - Training should take around 2.3 hours (2 x M40) - Inference time should be around 80ms / image (but in parallel on 2 GPUs, so half the total time) - Box AP on `coco_2014_minival` should be around 22.1% (+/- 0.1% stdev measured over 3 runs) To understand how learning schedules are adjusted (the "linear scaling rule"), please study these tutorial config files and read our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). **Aside from this tutorial, all of our released configs make use of 8 GPUs. If you will be using fewer than 8 GPUs for training (or do anything else that changes the minibatch size), it is essential that you understand how to manipulate training schedules according to the linear scaling rule.** **Notes:** - This training example uses a relatively low GPU-compute model and thus overhead from Caffe2 Python ops is relatively high. As a result, scaling as the number of GPUs is increased from 2 to 8 is relatively poor (e.g., training with 8 GPUs takes about 0.9 hours, only 4.5x faster than with 1 GPU). As larger, more GPU-compute heavy models are used, the scaling improves. ================================================ FILE: INSTALL.md ================================================ # Installing Detectron This document covers how to install Detectron, its dependencies (including Caffe2), and the COCO dataset. - For general information about Detectron, please see [`README.md`](README.md). **Requirements:** - NVIDIA GPU, Linux, Python2 - Caffe2, various standard Python packages, and the COCO API; Instructions for installing these dependencies are found below **Notes:** - Detectron operators currently do not have CPU implementation; a GPU system is required. - Detectron has been tested extensively with CUDA 8.0 and cuDNN 6.0.21. ## Caffe2 To install Caffe2 with CUDA support, follow the [installation instructions](https://caffe2.ai/docs/getting-started.html) from the [Caffe2 website](https://caffe2.ai/). **If you already have Caffe2 installed, make sure to update your Caffe2 to a version that includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron).** Please ensure that your Caffe2 installation was successful before proceeding by running the following commands and checking their output as directed in the comments. ``` # To check if Caffe2 build was successful python -c 'from caffe2.python import core' 2>/dev/null && echo "Success" || echo "Failure" # To check if Caffe2 GPU build was successful # This must print a number > 0 in order to use Detectron python -c 'from caffe2.python import workspace; print(workspace.NumCudaDevices())' ``` If the `caffe2` Python package is not found, you likely need to adjust your `PYTHONPATH` environment variable to include its location (`/path/to/caffe2/build`, where `build` is the Caffe2 CMake build directory). ## Other Dependencies Install the [COCO API](https://github.com/cocodataset/cocoapi): ``` # COCOAPI=/path/to/clone/cocoapi git clone https://github.com/cocodataset/cocoapi.git $COCOAPI cd $COCOAPI/PythonAPI # Install into global site-packages make install # Alternatively, if you do not have permissions or prefer # not to install the COCO API into global site-packages python setup.py install --user ``` Note that instructions like `# COCOAPI=/path/to/install/cocoapi` indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (`COCOAPI` in this case) accordingly. ## Detectron Clone the Detectron repository: ``` # DETECTRON=/path/to/clone/detectron git clone https://github.com/facebookresearch/detectron $DETECTRON ``` Install Python dependencies: ``` pip install -r $DETECTRON/requirements.txt ``` Set up Python modules: ``` cd $DETECTRON && make ``` Check that Detectron tests pass (e.g. for [`SpatialNarrowAsOp test`](detectron/tests/test_spatial_narrow_as_op.py)): ``` python $DETECTRON/detectron/tests/test_spatial_narrow_as_op.py ``` ## That's All You Need for Inference At this point, you can run inference using pretrained Detectron models. Take a look at our [inference tutorial](GETTING_STARTED.md) for an example. If you want to train models on the COCO dataset, then please continue with the installation instructions. ## Datasets Detectron finds datasets via symlinks from `detectron/datasets/data` to the actual locations where the dataset images and annotations are stored. For instructions on how to create symlinks for COCO and other datasets, please see [`detectron/datasets/data/README.md`](detectron/datasets/data/README.md). After symlinks have been created, that's all you need to start training models. ## Advanced Topic: Custom Operators for New Research Projects Please read the custom operators section of the [`FAQ`](FAQ.md) first. For convenience, we provide CMake support for building custom operators. All custom operators are built into a single library that can be loaded dynamically from Python. Place your custom operator implementation under [`detectron/ops/`](detectron/ops/) and see [`detectron/tests/test_zero_even_op.py`](detectron/tests/test_zero_even_op.py) for an example of how to load custom operators from Python. Build the custom operators library: ``` cd $DETECTRON && make ops ``` Check that the custom operator tests pass: ``` python $DETECTRON/detectron/tests/test_zero_even_op.py ``` ## Docker Image We provide a [`Dockerfile`](docker/Dockerfile) that you can use to build a Detectron image on top of a Caffe2 image that satisfies the requirements outlined at the top. If you would like to use a Caffe2 image different from the one we use by default, please make sure that it includes the [Detectron module](https://github.com/pytorch/pytorch/tree/master/modules/detectron). Build the image: ``` cd $DETECTRON/docker docker build -t detectron:c2-cuda9-cudnn7 . ``` Run the image (e.g. for [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py)): ``` nvidia-docker run --rm -it detectron:c2-cuda9-cudnn7 python detectron/tests/test_batch_permutation_op.py ``` ## Troubleshooting In case of Caffe2 installation problems, please read the troubleshooting section of the relevant Caffe2 [installation instructions](https://caffe2.ai/docs/getting-started.html) first. In the following, we provide additional troubleshooting tips for Caffe2 and Detectron. ### Caffe2 Operator Profiling Caffe2 comes with performance [`profiling`](https://github.com/pytorch/pytorch/tree/master/caffe2/contrib/prof) support which you may find useful for benchmarking or debugging your operators (see [`BatchPermutationOp test`](detectron/tests/test_batch_permutation_op.py) for example usage). Profiling support is not built by default and you can enable it by setting the `-DUSE_PROF=ON` flag when running Caffe2 CMake. ### CMake Cannot Find CUDA and cuDNN Sometimes CMake has trouble with finding CUDA and cuDNN dirs on your machine. When building Caffe2, you can point CMake to CUDA and cuDNN dirs by running: ``` cmake .. \ # insert your Caffe2 CMake flags here -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \ -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir ``` Similarly, when building custom Detectron operators you can use: ``` cd $DETECTRON mkdir -p build && cd build cmake .. \ -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit/dir \ -DCUDNN_ROOT_DIR=/path/to/cudnn/root/dir make ``` Note that you can use the same commands to get CMake to use specific versions of CUDA and cuDNN out of possibly multiple versions installed on your machine. ### Protobuf Errors Caffe2 uses protobuf as its serialization format and requires version `3.2.0` or newer. If your protobuf version is older, you can build protobuf from Caffe2 protobuf submodule and use that version instead. To build Caffe2 protobuf submodule: ``` # CAFFE2=/path/to/caffe2 cd $CAFFE2/third_party/protobuf/cmake mkdir -p build && cd build cmake .. \ -DCMAKE_INSTALL_PREFIX=$HOME/c2_tp_protobuf \ -Dprotobuf_BUILD_TESTS=OFF \ -DCMAKE_CXX_FLAGS="-fPIC" make install ``` To point Caffe2 CMake to the newly built protobuf: ``` cmake .. \ # insert your Caffe2 CMake flags here -DPROTOBUF_PROTOC_EXECUTABLE=$HOME/c2_tp_protobuf/bin/protoc \ -DPROTOBUF_INCLUDE_DIR=$HOME/c2_tp_protobuf/include \ -DPROTOBUF_LIBRARY=$HOME/c2_tp_protobuf/lib64/libprotobuf.a ``` You may also experience problems with protobuf if you have both system and anaconda packages installed. This could lead to problems as the versions could be mixed at compile time or at runtime. This issue can also be overcome by following the commands from above. ### Caffe2 Python Binaries In case you experience issues with CMake being unable to find the required Python paths when building Caffe2 Python binaries (e.g. in virtualenv), you can try pointing Caffe2 CMake to python library and include dir by using: ``` cmake .. \ # insert your Caffe2 CMake flags here -DPYTHON_LIBRARY=$(python -c "from distutils import sysconfig; print(sysconfig.get_python_lib())") \ -DPYTHON_INCLUDE_DIR=$(python -c "from distutils import sysconfig; print(sysconfig.get_python_inc())") ``` ### Caffe2 with NNPACK Build Detectron does not require Caffe2 built with NNPACK support. If you face NNPACK related issues during Caffe2 installation, you can safely disable NNPACK by setting the `-DUSE_NNPACK=OFF` CMake flag. ### Caffe2 with OpenCV Build Analogously to the NNPACK case above, you can disable OpenCV by setting the `-DUSE_OPENCV=OFF` CMake flag. ### COCO API Undefined Symbol Error If you encounter a COCO API import error due to an undefined symbol, as reported [here](https://github.com/cocodataset/cocoapi/issues/35), make sure that your python versions are not getting mixed. For instance, this issue may arise if you have [both system and conda numpy installed](https://stackoverflow.com/questions/36190757/numpy-undefined-symbol-pyfpe-jbuf). ### CMake Cannot Find Caffe2 In case you experience issues with CMake being unable to find the Caffe2 package when building custom operators, make sure you have run `make install` as part of your Caffe2 installation process. ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MODEL_ZOO.md ================================================ # Detectron Model Zoo and Baselines ## Introduction This file documents a large collection of baselines trained with Detectron, primarily in late December 2017. We refer to these results as the *12_2017_baselines*. All configurations for these baselines are located in the `configs/12_2017_baselines` directory. The tables below provide results and useful statistics about training and inference. Links to the trained models as well as their output are provided. Unless noted differently below (see "Notes" under each table), the following common settings are used for all training and inference runs. #### Common Settings and Notes - All baselines were run on [Big Basin](https://code.facebook.com/posts/1835166200089399/introducing-big-basin) servers with 8 NVIDIA Tesla P100 GPU accelerators (with 16GB GPU memory, CUDA 8.0, and cuDNN 6.0.21). - All baselines were trained using 8 GPU data parallel sync SGD with a minibatch size of either 8 or 16 images (see the *im/gpu* column). - For training, only horizontal flipping data augmentation was used. - For inference, no test-time augmentations (e.g., multiple scales, flipping) were used. - All models were trained on the union of `coco_2014_train` and `coco_2014_valminusminival`, which is exactly equivalent to the recently defined `coco_2017_train` dataset. - All models were tested on the `coco_2014_minival` dataset, which is exactly equivalent to the recently defined `coco_2017_val` dataset. - Inference times are often expressed as "*X* + *Y*", in which *X* is time taken in reasonably well-optimized GPU code and *Y* is time taken in unoptimized CPU code. (The CPU code time could be reduced substantially with additional engineering.) - Inference results for boxes, masks, and keypoints ("kps") are provided in the [COCO json format](http://cocodataset.org/#format-data). - The *model id* column is provided for ease of reference. - To check downloaded file integrity: for any download URL on this page, simply append `.md5sum` to the URL to download the file's md5 hash. - All models and results below are on the [COCO dataset](http://cocodataset.org). - Baseline models and results for the [Cityscapes dataset](https://www.cityscapes-dataset.com/) are coming soon! #### Training Schedules We use three training schedules, indicated by the *lr schd* column in the tables below. - **1x**: For minibatch size 16, this schedule starts at a LR of 0.02 and is decreased by a factor of * 0.1 after 60k and 80k iterations and finally terminates at 90k iterations. This schedules results in 12.17 epochs over the 118,287 images in `coco_2014_train` union `coco_2014_valminusminival` (or equivalently, `coco_2017_train`). - **2x**: Twice as long as the 1x schedule with the LR change points scaled proportionally. - **s1x** ("stretched 1x"): This schedule scales the 1x schedule by roughly 1.44x, but also extends the duration of the first learning rate. With a minibatch size of 16, it reduces the LR by * 0.1 at 100k and 120k iterations, finally ending after 130k iterations. All training schedules also use a 500 iteration linear learning rate warm up. When changing the minibatch size between 8 and 16 images, we adjust the number of SGD iterations and the base learning rate according to the principles outlined in our paper [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). #### License All models available for download through this document are licensed under the [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/). #### ImageNet Pretrained Models The backbone models pretrained on ImageNet are available in the format used by Detectron. Unless otherwise noted, these models are trained on the standard ImageNet-1k dataset. - [R-50.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl): converted copy of MSRA's original ResNet-50 model - [R-101.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl): converted copy of MSRA's original ResNet-101 model - [X-101-64x4d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl): converted copy of FB's original ResNeXt-101-64x4d model trained with Torch7 - [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB - [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl): ResNeXt-152-32x8d model **trained on ImageNet-5k** with Caffe2 at FB (see our [ResNeXt paper](https://arxiv.org/abs/1611.05431) for details on ImageNet-5k) #### Log Files [Training and inference logs](https://dl.fbaipublicfiles.com/detectron/logs/model_zoo_12_2017_baseline_logs.tgz) are available for most models in the model zoo. ## Proposal, Box, and Mask Detection Baselines ### RPN Proposal Baselines
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
kp
AP
prop.
AR
model id download
links
R-50-C4 RPN 1x 2 4.3 0.187 4.7 0.113 - - - 51.6 35998355 model | props: 123
R-50-FPN RPN 1x 2 6.4 0.416 10.4 0.080 - - - 57.2 35998814 model | props: 123
R-101-FPN RPN 1x 2 8.1 0.503 12.6 0.108 - - - 58.2 35998887 model | props: 123
X-101-64x4d-FPN RPN 1x 2 11.5 1.395 34.9 0.292 - - - 59.4 35998956 model | props: 123
X-101-32x8d-FPN RPN 1x 2 11.6 1.102 27.6 0.222 - - - 59.5 36760102 model | props: 123
**Notes:** - Inference time only includes RPN proposal generation. - "prop. AR" is proposal average recall at 1000 proposals per image. - Proposal download links ("props"): "1" is `coco_2014_train`; "2" is `coco_2014_valminusminival`; and "3" is `coco_2014_minival`. ### Fast & Mask R-CNN Baselines Using Precomputed RPN Proposals
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
kp
AP
prop.
AR
model id download
links
R-50-C4 Fast 1x 1 6.0 0.456 22.8 0.241 + 0.003 34.4 - - - 36224013 model | boxes
R-50-C4 Fast 2x 1 6.0 0.453 45.3 0.241 + 0.003 35.6 - - - 36224046 model | boxes
R-50-FPN Fast 1x 2 6.0 0.285 7.1 0.076 + 0.004 36.4 - - - 36225147 model | boxes
R-50-FPN Fast 2x 2 6.0 0.287 14.4 0.077 + 0.004 36.8 - - - 36225249 model | boxes
R-101-FPN Fast 1x 2 7.7 0.448 11.2 0.102 + 0.003 38.5 - - - 36228880 model | boxes
R-101-FPN Fast 2x 2 7.7 0.449 22.5 0.103 + 0.004 39.0 - - - 36228933 model | boxes
X-101-64x4d-FPN Fast 1x 1 6.3 0.994 49.7 0.292 + 0.003 40.4 - - - 36226250 model | boxes
X-101-64x4d-FPN Fast 2x 1 6.3 0.980 98.0 0.291 + 0.003 39.8 - - - 36226326 model | boxes
X-101-32x8d-FPN Fast 1x 1 6.4 0.721 36.1 0.217 + 0.003 40.6 - - - 37119777 model | boxes
X-101-32x8d-FPN Fast 2x 1 6.4 0.720 72.0 0.217 + 0.003 39.7 - - - 37121469 model | boxes
R-50-C4 Mask 1x 1 6.4 0.466 23.3 0.252 + 0.020 35.5 31.3 - - 36224121 model | boxes | masks
R-50-C4 Mask 2x 1 6.4 0.464 46.4 0.253 + 0.019 36.9 32.5 - - 36224151 model | boxes | masks
R-50-FPN Mask 1x 2 7.9 0.377 9.4 0.082 + 0.019 37.3 33.7 - - 36225401 model | boxes | masks
R-50-FPN Mask 2x 2 7.9 0.377 18.9 0.083 + 0.018 37.7 34.0 - - 36225732 model | boxes | masks
R-101-FPN Mask 1x 2 9.6 0.539 13.5 0.111 + 0.018 39.4 35.6 - - 36229407 model | boxes | masks
R-101-FPN Mask 2x 2 9.6 0.537 26.9 0.109 + 0.016 40.0 35.9 - - 36229740 model | boxes | masks
X-101-64x4d-FPN Mask 1x 1 7.3 1.036 51.8 0.292 + 0.016 41.3 37.0 - - 36226382 model | boxes | masks
X-101-64x4d-FPN Mask 2x 1 7.3 1.035 103.5 0.292 + 0.014 41.1 36.6 - - 36672114 model | boxes | masks
X-101-32x8d-FPN Mask 1x 1 7.4 0.766 38.3 0.223 + 0.017 41.3 37.0 - - 37121516 model | boxes | masks
X-101-32x8d-FPN Mask 2x 1 7.4 0.765 76.5 0.222 + 0.014 40.7 36.3 - - 37121596 model | boxes | masks
**Notes:** - Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone. - Inference time *excludes* proposal generation. ### End-to-End Faster & Mask R-CNN Baselines
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
kp
AP
prop.
AR
model id download
links
R-50-C4 Faster 1x 1 6.3 0.566 28.3 0.167 + 0.003 34.8 - - - 35857197 model | boxes
R-50-C4 Faster 2x 1 6.3 0.569 56.9 0.174 + 0.003 36.5 - - - 35857281 model | boxes
R-50-FPN Faster 1x 2 7.2 0.544 13.6 0.093 + 0.004 36.7 - - - 35857345 model | boxes
R-50-FPN Faster 2x 2 7.2 0.546 27.3 0.092 + 0.004 37.9 - - - 35857389 model | boxes
R-101-FPN Faster 1x 2 8.9 0.647 16.2 0.120 + 0.004 39.4 - - - 35857890 model | boxes
R-101-FPN Faster 2x 2 8.9 0.647 32.4 0.119 + 0.004 39.8 - - - 35857952 model | boxes
X-101-64x4d-FPN Faster 1x 1 6.9 1.057 52.9 0.305 + 0.003 41.5 - - - 35858015 model | boxes
X-101-64x4d-FPN Faster 2x 1 6.9 1.055 105.5 0.304 + 0.003 40.8 - - - 35858198 model | boxes
X-101-32x8d-FPN Faster 1x 1 7.0 0.799 40.0 0.233 + 0.004 41.3 - - - 36761737 model | boxes
X-101-32x8d-FPN Faster 2x 1 7.0 0.800 80.0 0.233 + 0.003 40.6 - - - 36761786 model | boxes
R-50-C4 Mask 1x 1 6.6 0.620 31.0 0.181 + 0.018 35.8 31.4 - - 35858791 model | boxes | masks
R-50-C4 Mask 2x 1 6.6 0.620 62.0 0.182 + 0.017 37.8 32.8 - - 35858828 model | boxes | masks
R-50-FPN Mask 1x 2 8.6 0.889 22.2 0.099 + 0.019 37.7 33.9 - - 35858933 model | boxes | masks
R-50-FPN Mask 2x 2 8.6 0.897 44.9 0.099 + 0.018 38.6 34.5 - - 35859007 model | boxes | masks
R-101-FPN Mask 1x 2 10.2 1.008 25.2 0.126 + 0.018 40.0 35.9 - - 35861795 model | boxes | masks
R-101-FPN Mask 2x 2 10.2 0.993 49.7 0.126 + 0.017 40.9 36.4 - - 35861858 model | boxes | masks
X-101-64x4d-FPN Mask 1x 1 7.6 1.217 60.9 0.309 + 0.018 42.4 37.5 - - 36494496 model | boxes | masks
X-101-64x4d-FPN Mask 2x 1 7.6 1.210 121.0 0.309 + 0.015 42.2 37.2 - - 35859745 model | boxes | masks
X-101-32x8d-FPN Mask 1x 1 7.7 0.961 48.1 0.239 + 0.019 42.1 37.3 - - 36761843 model | boxes | masks
X-101-32x8d-FPN Mask 2x 1 7.7 0.975 97.5 0.240 + 0.016 41.7 36.9 - - 36762092 model | boxes | masks
**Notes:** - For these models, RPN and the detector are trained jointly and end-to-end. - Inference time is fully image-to-detections, *including* proposal generation. ### RetinaNet Baselines
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
kp
AP
prop.
AR
model id download
links
R-50-FPN RetinaNet 1x 2 6.8 0.483 12.1 0.125 35.7 - - - 36768636 model | boxes
R-50-FPN RetinaNet 2x 2 6.8 0.482 24.1 0.127 35.7 - - - 36768677 model | boxes
R-101-FPN RetinaNet 1x 2 8.7 0.666 16.7 0.156 37.7 - - - 36768744 model | boxes
R-101-FPN RetinaNet 2x 2 8.7 0.666 33.3 0.154 37.8 - - - 36768840 model | boxes
X-101-64x4d-FPN RetinaNet 1x 2 12.6 1.613 40.3 0.341 39.8 - - - 36768875 model | boxes
X-101-64x4d-FPN RetinaNet 2x 2 12.6 1.625 81.3 0.339 39.2 - - - 36768907 model | boxes
X-101-32x8d-FPN RetinaNet 1x 2 12.7 1.343 33.6 0.277 39.5 - - - 36769563 model | boxes
X-101-32x8d-FPN RetinaNet 2x 2 12.7 1.340 67.0 0.276 38.6 - - - 36769641 model | boxes
**Notes:** none ### Mask R-CNN with Bells & Whistles
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
kp
AP
prop.
AR
model id download
links
X-152-32x8d-FPN-IN5k Mask s1x 1 9.6 1.188 85.8 12.100 + 0.046 48.1 41.5 - - 37129812 model | boxes | masks
[above without test-time aug.] 0.325 + 0.018 45.2 39.7 - -
**Notes:** - A deeper backbone architecture is used: ResNeXt-**152**-32x8d-FPN - The backbone ResNeXt-152-32x8d model was trained on ImageNet-**5k** (not the usual ImageNet-1k) - Training uses multi-scale jitter over scales {640, 672, 704, 736, 768, 800} - Row 1: test-time augmentations are multi-scale testing over {400, 500, 600, 700, 900, 1000, 1100, 1200} and horizontal flipping (on each scale) - Row 2: same model as row 1, but without any test-time augmentation (i.e., same as the common baseline configuration) - Like the other results, this is a single model result (it is not an ensemble of models) ## Keypoint Detection Baselines #### Common Settings for Keypoint Detection Baselines (That Differ from Boxes and Masks) Our keypoint detection baselines differ from our box and mask baselines in a couple of details: - Due to less training data for the keypoint detection task compared with boxes and masks, we enable multi-scale jitter during training for all keypoint detection models. (Testing is still without any test-time augmentations by default.) - Models are trained only on images from `coco_2014_train` union `coco_2014_valminusminival` that contain at least one person with keypoint annotations (all other images are discarded from the training set). - Metrics are reported for the person class only (still run on the entire `coco_2014_minival` dataset). ### Person-Specific RPN Baselines
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box AP mask AP kp AP prop. AR model id download
links
R-50-FPN RPN 1x 2 6.4 0.391 9.8 0.082 - - - 64.0 35998996 model | props: 123
R-101-FPN RPN 1x 2 8.1 0.504 12.6 0.109 - - - 65.2 35999521 model | props: 123
X-101-64x4d-FPN RPN 1x 2 11.5 1.394 34.9 0.289 - - - 65.9 35999553 model | props: 123
X-101-32x8d-FPN RPN 1x 2 11.6 1.104 27.6 0.224 - - - 66.2 36760438 model | props: 123
**Notes:** - *Metrics are for the person category only.* - Inference time only includes RPN proposal generation. - "prop. AR" is proposal average recall at 1000 proposals per image. - Proposal download links ("props"): "1" is `coco_2014_train`; "2" is `coco_2014_valminusminival`; and "3" is `coco_2014_minival`. These include all images, not just the ones with valid keypoint annotations. ### Keypoint-Only Mask R-CNN Baselines Using Precomputed RPN Proposals
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box AP mask AP kp AP prop. AR model id download
links
R-50-FPN Kps 1x 2 7.7 0.533 13.3 0.081 + 0.087 52.7 - 64.1 - 37651787 model | boxes | kps
R-50-FPN Kps s1x 2 7.7 0.533 19.2 0.080 + 0.085 53.4 - 65.5 - 37651887 model | boxes | kps
R-101-FPN Kps 1x 2 9.4 0.668 16.7 0.109 + 0.080 53.5 - 65.0 - 37651996 model | boxes | kps
R-101-FPN Kps s1x 2 9.4 0.668 24.1 0.108 + 0.076 54.6 - 66.0 - 37652016 model | boxes | kps
X-101-64x4d-FPN Kps 1x 2 12.8 1.477 36.9 0.288 + 0.077 55.8 - 66.7 - 37731079 model | boxes | kps
X-101-64x4d-FPN Kps s1x 2 12.9 1.478 53.4 0.286 + 0.075 56.3 - 67.1 - 37731142 model | boxes | kps
X-101-32x8d-FPN Kps 1x 2 12.9 1.215 30.4 0.219 + 0.084 55.4 - 66.2 - 37730253 model | boxes | kps
X-101-32x8d-FPN Kps s1x 2 12.9 1.214 43.8 0.218 + 0.071 55.9 - 67.0 - 37731010 model | boxes | kps
**Notes:** - *Metrics are for the person category only.* - Each row uses precomputed RPN proposals from the corresponding table row above that uses the same backbone. - Inference time *excludes* proposal generation. ### End-to-End Keypoint-Only Mask R-CNN Baselines
        backbone         type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box AP mask AP kp AP prop. AR model id download
links
R-50-FPN Kps 1x 2 9.0 0.832 20.8 0.097 + 0.092 53.6 - 64.2 - 37697547 model | boxes | kps
R-50-FPN Kps s1x 2 9.0 0.828 29.9 0.096 + 0.089 54.3 - 65.4 - 37697714 model | boxes | kps
R-101-FPN Kps 1x 2 10.6 0.923 23.1 0.124 + 0.084 54.5 - 64.8 - 37697946 model | boxes | kps
R-101-FPN Kps s1x 2 10.6 0.921 33.3 0.123 + 0.083 55.3 - 65.8 - 37698009 model | boxes | kps
X-101-64x4d-FPN Kps 1x 2 14.1 1.655 41.4 0.302 + 0.079 56.3 - 66.0 - 37732355 model | boxes | kps
X-101-64x4d-FPN Kps s1x 2 14.1 1.731 62.5 0.322 + 0.074 56.9 - 66.8 - 37732415 model | boxes | kps
X-101-32x8d-FPN Kps 1x 2 14.2 1.410 35.3 0.235 + 0.080 56.0 - 66.0 - 37792158 model | boxes | kps
X-101-32x8d-FPN Kps s1x 2 14.2 1.408 50.8 0.236 + 0.075 56.9 - 67.0 - 37732318 model | boxes | kps
**Notes:** - *Metrics are for the person category only.* - For these models, RPN and the detector are trained jointly and end-to-end. - Inference time is fully image-to-detections, *including* proposal generation. ================================================ FILE: Makefile ================================================ # Don't use the --user flag for setup.py develop mode with virtualenv. DEV_USER_FLAG=$(shell python -c "import sys; print('' if hasattr(sys, 'real_prefix') else '--user')") .PHONY: default default: dev .PHONY: install install: python setup.py install .PHONY: ops ops: mkdir -p build && cd build && cmake .. && make -j$(shell nproc) .PHONY: dev dev: python setup.py develop $(DEV_USER_FLAG) .PHONY: clean clean: python setup.py develop --uninstall $(DEV_USER_FLAG) rm -rf build ================================================ FILE: NOTICE ================================================ Portions of this software are derived from py-faster-rcnn. ============================================================================== py-faster-rcnn licence ============================================================================== Faster R-CNN The MIT License (MIT) Copyright (c) 2015 Microsoft Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ **Detectron is deprecated. Please see [detectron2](https://github.com/facebookresearch/detectron2), a ground-up rewrite of Detectron in PyTorch.** # Detectron Detectron is Facebook AI Research's software system that implements state-of-the-art object detection algorithms, including [Mask R-CNN](https://arxiv.org/abs/1703.06870). It is written in Python and powered by the [Caffe2](https://github.com/caffe2/caffe2) deep learning framework. At FAIR, Detectron has enabled numerous research projects, including: [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144), [Mask R-CNN](https://arxiv.org/abs/1703.06870), [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333), [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002), [Non-local Neural Networks](https://arxiv.org/abs/1711.07971), [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370), [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440), [DensePose: Dense Human Pose Estimation In The Wild](https://arxiv.org/abs/1802.00434), and [Group Normalization](https://arxiv.org/abs/1803.08494).

Example Mask R-CNN output.

## Introduction The goal of Detectron is to provide a high-quality, high-performance codebase for object detection *research*. It is designed to be flexible in order to support rapid implementation and evaluation of novel research. Detectron includes implementations of the following object detection algorithms: - [Mask R-CNN](https://arxiv.org/abs/1703.06870) -- *Marr Prize at ICCV 2017* - [RetinaNet](https://arxiv.org/abs/1708.02002) -- *Best Student Paper Award at ICCV 2017* - [Faster R-CNN](https://arxiv.org/abs/1506.01497) - [RPN](https://arxiv.org/abs/1506.01497) - [Fast R-CNN](https://arxiv.org/abs/1504.08083) - [R-FCN](https://arxiv.org/abs/1605.06409) using the following backbone network architectures: - [ResNeXt{50,101,152}](https://arxiv.org/abs/1611.05431) - [ResNet{50,101,152}](https://arxiv.org/abs/1512.03385) - [Feature Pyramid Networks](https://arxiv.org/abs/1612.03144) (with ResNet/ResNeXt) - [VGG16](https://arxiv.org/abs/1409.1556) Additional backbone architectures may be easily implemented. For more details about these models, please see [References](#references) below. ## Update - 4/2018: Support Group Normalization - see [`GN/README.md`](./projects/GN/README.md) ## License Detectron is released under the [Apache 2.0 license](https://github.com/facebookresearch/detectron/blob/master/LICENSE). See the [NOTICE](https://github.com/facebookresearch/detectron/blob/master/NOTICE) file for additional details. ## Citing Detectron If you use Detectron in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry. ``` @misc{Detectron2018, author = {Ross Girshick and Ilija Radosavovic and Georgia Gkioxari and Piotr Doll\'{a}r and Kaiming He}, title = {Detectron}, howpublished = {\url{https://github.com/facebookresearch/detectron}}, year = {2018} } ``` ## Model Zoo and Baselines We provide a large set of baseline results and trained models available for download in the [Detectron Model Zoo](MODEL_ZOO.md). ## Installation Please find installation instructions for Caffe2 and Detectron in [`INSTALL.md`](INSTALL.md). ## Quick Start: Using Detectron After installation, please see [`GETTING_STARTED.md`](GETTING_STARTED.md) for brief tutorials covering inference and training with Detectron. ## Getting Help To start, please check the [troubleshooting](INSTALL.md#troubleshooting) section of our installation instructions as well as our [FAQ](FAQ.md). If you couldn't find help there, try searching our GitHub issues. We intend the issues page to be a forum in which the community collectively troubleshoots problems. If bugs are found, **we appreciate pull requests** (including adding Q&A's to `FAQ.md` and improving our installation instructions and troubleshooting documents). Please see [CONTRIBUTING.md](CONTRIBUTING.md) for more information about contributing to Detectron. ## References - [Data Distillation: Towards Omni-Supervised Learning](https://arxiv.org/abs/1712.04440). Ilija Radosavovic, Piotr Dollár, Ross Girshick, Georgia Gkioxari, and Kaiming He. Tech report, arXiv, Dec. 2017. - [Learning to Segment Every Thing](https://arxiv.org/abs/1711.10370). Ronghang Hu, Piotr Dollár, Kaiming He, Trevor Darrell, and Ross Girshick. Tech report, arXiv, Nov. 2017. - [Non-Local Neural Networks](https://arxiv.org/abs/1711.07971). Xiaolong Wang, Ross Girshick, Abhinav Gupta, and Kaiming He. Tech report, arXiv, Nov. 2017. - [Mask R-CNN](https://arxiv.org/abs/1703.06870). Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross Girshick. IEEE International Conference on Computer Vision (ICCV), 2017. - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002). Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, and Piotr Dollár. IEEE International Conference on Computer Vision (ICCV), 2017. - [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). Priya Goyal, Piotr Dollár, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. Tech report, arXiv, June 2017. - [Detecting and Recognizing Human-Object Interactions](https://arxiv.org/abs/1704.07333). Georgia Gkioxari, Ross Girshick, Piotr Dollár, and Kaiming He. Tech report, arXiv, Apr. 2017. - [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144). Tsung-Yi Lin, Piotr Dollár, Ross Girshick, Kaiming He, Bharath Hariharan, and Serge Belongie. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017. - [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431). Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, and Kaiming He. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017. - [R-FCN: Object Detection via Region-based Fully Convolutional Networks](http://arxiv.org/abs/1605.06409). Jifeng Dai, Yi Li, Kaiming He, and Jian Sun. Conference on Neural Information Processing Systems (NIPS), 2016. - [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385). Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016. - [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](http://arxiv.org/abs/1506.01497) Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Conference on Neural Information Processing Systems (NIPS), 2015. - [Fast R-CNN](http://arxiv.org/abs/1504.08083). Ross Girshick. IEEE International Conference on Computer Vision (ICCV), 2015. ================================================ FILE: cmake/Summary.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake # Prints configuration summary. function (detectron_print_config_summary) message(STATUS "Summary:") message(STATUS " CMake version : ${CMAKE_VERSION}") message(STATUS " CMake command : ${CMAKE_COMMAND}") message(STATUS " System name : ${CMAKE_SYSTEM_NAME}") message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " Caffe2 version : ${CAFFE2_VERSION}") message(STATUS " Caffe2 include path : ${CAFFE2_INCLUDE_DIRS}") if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA) message(STATUS " Caffe2 found CUDA : True") message(STATUS " CUDA version : ${CUDA_VERSION}") message(STATUS " CuDNN version : ${CUDNN_VERSION}") else() message(STATUS " Caffe2 found CUDA : False") endif() endfunction() ================================================ FILE: cmake/legacy/Cuda.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Cuda.cmake # Caffe2 cmake utility to prepare for cuda build. # This cmake file is called from Dependencies.cmake. You do not need to # manually invoke it. # Known NVIDIA GPU achitectures Caffe2 can be compiled for. # Default is set to cuda 9. If we detect the cuda architectores to be less than # 9, we will lower it to the corresponding known archs. set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61") # for CUDA 8.x set(Caffe2_known_gpu_archs7 "20 21(20) 30 35 50 52") # for CUDA 7.x ################################################################################################ # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME # Usage: # caffe_select_nvcc_arch_flags(out_variable) function(caffe2_select_nvcc_arch_flags out_variable) # List of arch names set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual") set(__archs_name_default "All") # Set CUDA_ARCH_NAME strings (so it will be seen as dropbox in the CMake GUI) set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU architecture") set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names}) mark_as_advanced(CUDA_ARCH_NAME) # Verify CUDA_ARCH_NAME value if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};") string(REPLACE ";" ", " __archs_names "${__archs_names}") message(FATAL_ERROR "Invalid CUDA_ARCH_NAME, supported values: ${__archs_names}. Got ${CUDA_ARCH_NAME}") endif() if(${CUDA_ARCH_NAME} STREQUAL "Manual") set(CUDA_ARCH_BIN "" CACHE STRING "Specify GPU architectures to build binaries for (BIN(PTX) format is supported)") set(CUDA_ARCH_PTX "" CACHE STRING "Specify GPU architectures to build PTX intermediate code for") mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) else() unset(CUDA_ARCH_BIN CACHE) unset(CUDA_ARCH_PTX CACHE) endif() if(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(__cuda_arch_bin "30 35") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") set(__cuda_arch_bin "50") elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") set(__cuda_arch_bin "60 61") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") set(__cuda_arch_bin "70") elseif(${CUDA_ARCH_NAME} STREQUAL "All") set(__cuda_arch_bin ${Caffe2_known_gpu_archs}) elseif(${CUDA_ARCH_NAME} STREQUAL "Manual") set(__cuda_arch_bin ${CUDA_ARCH_BIN}) set(__cuda_arch_ptx ${CUDA_ARCH_PTX}) else() message(FATAL_ERROR "Invalid CUDA_ARCH_NAME") endif() # Remove dots and convert to lists string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}") string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}") list(REMOVE_DUPLICATES __cuda_arch_bin) list(REMOVE_DUPLICATES __cuda_arch_ptx) set(__nvcc_flags "") set(__nvcc_archs_readable "") # Tell NVCC to add binaries for the specified GPUs foreach(__arch ${__cuda_arch_bin}) if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)") # User explicitly specified PTX for the concrete BIN list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1}) else() # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch}) list(APPEND __nvcc_archs_readable sm_${__arch}) endif() endforeach() # Tell NVCC to add PTX intermediate code for the specified architectures foreach(__arch ${__cuda_arch_ptx}) list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch}) list(APPEND __nvcc_archs_readable compute_${__arch}) endforeach() string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}") set(${out_variable} ${__nvcc_flags} PARENT_SCOPE) set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE) endfunction() ################################################################################################ # Short command for cuda compilation # Usage: # caffe_cuda_compile( ) macro(caffe2_cuda_compile objlist_variable) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var}_backup_in_cuda_compile_ "${${var}}") # we remove /EHa as it generates warnings under windows string(REPLACE "/EHa" "" ${var} "${${var}}") endforeach() if(APPLE) list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function) endif() cuda_compile(cuda_objcs ${ARGN}) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var} "${${var}_backup_in_cuda_compile_}") unset(${var}_backup_in_cuda_compile_) endforeach() set(${objlist_variable} ${cuda_objcs}) endmacro() ################################################################################################ ### Non macro section ################################################################################################ # Special care for windows platform: we know that 32-bit windows does not support cuda. if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8)) message(FATAL_ERROR "CUDA support not available with 32-bit windows. Did you " "forget to set Win64 in the generator target?") return() endif() endif() find_package(CUDA 7.0 QUIET) find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand if(NOT CUDA_FOUND) set(HAVE_CUDA FALSE) return() endif() set(HAVE_CUDA TRUE) message(STATUS "CUDA detected: " ${CUDA_VERSION}) if (${CUDA_VERSION} LESS 7.0) message(FATAL_ERROR "Caffe2 requires CUDA 7.0 or later version") elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs7}) list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs8}) list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") # CUDA 8 may complain that sm_20 is no longer supported. Suppress the # warning for now. list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") endif() caffe2_include_directories(${CUDA_INCLUDE_DIRS}) list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) # find libcuda.so and lbnvrtc.so # For libcuda.so, we will find it under lib, lib64, and then the # stubs folder, in case we are building on a system that does not # have cuda driver installed. On windows, we also search under the # folder lib/x64. find_library(CUDA_CUDA_LIB cuda PATHS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64) find_library(CUDA_NVRTC_LIB nvrtc PATHS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64 lib/x64) # setting nvcc arch flags caffe2_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") if(CUDA_CUDA_LIB) message(STATUS "Found libcuda: ${CUDA_CUDA_LIB}") list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDA_LIB}) else() message(FATAL_ERROR "Cannot find libcuda.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.") endif() if(CUDA_NVRTC_LIB) message(STATUS "Found libnvrtc: ${CUDA_NVRTC_LIB}") list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_NVRTC_LIB}) else() message(FATAL_ERROR "Cannot find libnvrtc.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.") endif() # disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc. foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used) list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag}) endforeach() # Set C++11 support set(CUDA_PROPAGATE_HOST_FLAGS OFF) if (NOT MSVC) list(APPEND CUDA_NVCC_FLAGS "-std=c++14") list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") endif() # Debug and Release symbol support if (MSVC) if (${CMAKE_BUILD_TYPE} MATCHES "Release") if (${BUILD_SHARED_LIBS}) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MD") else() list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MT") endif() elseif(${CMAKE_BUILD_TYPE} MATCHES "Debug") message(FATAL_ERROR "Caffe2 currently does not support the combination of MSVC, Cuda " "and Debug mode. Either set USE_CUDA=OFF or set the build type " "to Release") if (${BUILD_SHARED_LIBS}) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MDd") else() list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MTd") endif() else() message(FATAL_ERROR "Unknown cmake build type: " ${CMAKE_BUILD_TYPE}) endif() endif() if(OpenMP_FOUND) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ${OpenMP_CXX_FLAGS}") endif() # Set :expt-relaxed-constexpr to suppress Eigen warnings list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) ================================================ FILE: cmake/legacy/Dependencies.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Dependencies.cmake # Find CUDA. include(cmake/legacy/Cuda.cmake) if (HAVE_CUDA) # CUDA 9.x requires GCC version <= 6 if ((CUDA_VERSION VERSION_EQUAL 9.0) OR (CUDA_VERSION VERSION_GREATER 9.0 AND CUDA_VERSION VERSION_LESS 10.0)) if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0 AND CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER) message(FATAL_ERROR "CUDA ${CUDA_VERSION} is not compatible with GCC version >= 7. " "Use the following option to use another version (for example): \n" " -DCUDA_HOST_COMPILER=/usr/bin/gcc-6\n") endif() # CUDA 8.0 requires GCC version <= 5 elseif (CUDA_VERSION VERSION_EQUAL 8.0) if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 6.0 AND CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER) message(FATAL_ERROR "CUDA 8.0 is not compatible with GCC version >= 6. " "Use the following option to use another version (for example): \n" " -DCUDA_HOST_COMPILER=/usr/bin/gcc-5\n") endif() endif() endif() # Find CUDNN. if (HAVE_CUDA) find_package(CuDNN REQUIRED) if (CUDNN_FOUND) caffe2_include_directories(${CUDNN_INCLUDE_DIRS}) endif() endif() ================================================ FILE: cmake/legacy/Modules/FindCuDNN.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Modules/FindCuDNN.cmake # - Try to find cuDNN # # The following variables are optionally searched for defaults # CUDNN_ROOT_DIR: Base directory where all cuDNN components are found # # The following are set after configuration is done: # CUDNN_FOUND # CUDNN_INCLUDE_DIRS # CUDNN_LIBRARIES # CUDNN_LIBRARY_DIRS include(FindPackageHandleStandardArgs) set(CUDNN_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA cuDNN") find_path(CUDNN_INCLUDE_DIR cudnn.h HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES cuda/include include) find_library(CUDNN_LIBRARY cudnn HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64) find_package_handle_standard_args( CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY) if(CUDNN_FOUND) # get cuDNN version file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") # Assemble cuDNN version if(NOT CUDNN_VERSION_MAJOR) set(CUDNN_VERSION "?") else() set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") endif() set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) message(STATUS "Found cuDNN: v${CUDNN_VERSION} (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})") mark_as_advanced(CUDNN_ROOT_DIR CUDNN_LIBRARY CUDNN_INCLUDE_DIR) endif() ================================================ FILE: cmake/legacy/Summary.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake # Prints configuration summary. function (detectron_print_config_summary) message(STATUS "Summary:") message(STATUS " CMake version : ${CMAKE_VERSION}") message(STATUS " CMake command : ${CMAKE_COMMAND}") message(STATUS " System name : ${CMAKE_SYSTEM_NAME}") message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " Caffe2 version : ${CAFFE2_VERSION}") message(STATUS " Caffe2 include path : ${CAFFE2_INCLUDE_DIRS}") message(STATUS " Have CUDA : ${HAVE_CUDA}") if (${HAVE_CUDA}) message(STATUS " CUDA version : ${CUDA_VERSION}") message(STATUS " CuDNN version : ${CUDNN_VERSION}") endif() endfunction() ================================================ FILE: cmake/legacy/Utils.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Utils.cmake ################################################################################################ # Exclude and prepend functionalities function (exclude OUTPUT INPUT) set(EXCLUDES ${ARGN}) foreach(EXCLUDE ${EXCLUDES}) list(REMOVE_ITEM INPUT "${EXCLUDE}") endforeach() set(${OUTPUT} ${INPUT} PARENT_SCOPE) endfunction(exclude) function (prepend OUTPUT PREPEND) set(OUT "") foreach(ITEM ${ARGN}) list(APPEND OUT "${PREPEND}${ITEM}") endforeach() set(${OUTPUT} ${OUT} PARENT_SCOPE) endfunction(prepend) ################################################################################################ # Clears variables from list # Usage: # caffe_clear_vars() macro(caffe_clear_vars) foreach(_var ${ARGN}) unset(${_var}) endforeach() endmacro() ################################################################################################ # Prints list element per line # Usage: # caffe_print_list() function(caffe_print_list) foreach(e ${ARGN}) message(STATUS ${e}) endforeach() endfunction() ################################################################################################ # Reads set of version defines from the header file # Usage: # caffe_parse_header( ..) macro(caffe_parse_header FILENAME FILE_VAR) set(vars_regex "") set(__parnet_scope OFF) set(__add_cache OFF) foreach(name ${ARGN}) if("${name}" STREQUAL "PARENT_SCOPE") set(__parnet_scope ON) elseif("${name}" STREQUAL "CACHE") set(__add_cache ON) elseif(vars_regex) set(vars_regex "${vars_regex}|${name}") else() set(vars_regex "${name}") endif() endforeach() if(EXISTS "${FILENAME}") file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" ) else() unset(${FILE_VAR}) endif() foreach(name ${ARGN}) if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE") if(${FILE_VAR}) if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*") string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}") else() set(${name} "") endif() if(__add_cache) set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE) elseif(__parnet_scope) set(${name} "${${name}}" PARENT_SCOPE) endif() else() unset(${name} CACHE) endif() endif() endforeach() endmacro() ################################################################################################ # Reads single version define from the header file and parses it # Usage: # caffe_parse_header_single_define( ) function(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME) set(${LIBNAME}_H "") if(EXISTS "${HDR_PATH}") file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1) endif() if(${LIBNAME}_H) string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}") string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}") string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}") set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE) # append a TWEAK version if it exists: set(${LIBNAME}_VERSION_TWEAK "") if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$") set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE) endif() if(${LIBNAME}_VERSION_TWEAK) set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE) else() set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE) endif() endif() endfunction() ######################################################################################################## # An option that the user can select. Can accept condition to control when option is available for user. # Usage: # caffe_option( "doc string" [IF ]) function(caffe_option variable description value) set(__value ${value}) set(__condition "") set(__varname "__value") foreach(arg ${ARGN}) if(arg STREQUAL "IF" OR arg STREQUAL "if") set(__varname "__condition") else() list(APPEND ${__varname} ${arg}) endif() endforeach() unset(__varname) if("${__condition}" STREQUAL "") set(__condition 2 GREATER 1) endif() if(${__condition}) if("${__value}" MATCHES ";") if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() elseif(DEFINED ${__value}) if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() else() option(${variable} "${description}" ${__value}) endif() else() unset(${variable} CACHE) endif() endfunction() ############################################################################## # Helper function to add as-needed flag around a library. function(caffe_add_as_needed_flag lib output_var) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") # TODO: Clang seems to not need this flag. Double check. set(${output_var} ${lib} PARENT_SCOPE) elseif(MSVC) # TODO: check what is the behavior of MSVC. # In MSVC, we will add whole archive in default. set(${output_var} ${lib} PARENT_SCOPE) else() # Assume everything else is like gcc: we will need as-needed flag. set(${output_var} -Wl,--no-as-needed ${lib} -Wl,--as-needed PARENT_SCOPE) endif() endfunction() ############################################################################## # Helper function to add whole_archive flag around a library. function(caffe_add_whole_archive_flag lib output_var) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") set(${output_var} -Wl,-force_load,$ PARENT_SCOPE) elseif(MSVC) # In MSVC, we will add whole archive in default. set(${output_var} -WHOLEARCHIVE:$ PARENT_SCOPE) else() # Assume everything else is like gcc set(${output_var} -Wl,--whole-archive ${lib} -Wl,--no-whole-archive PARENT_SCOPE) endif() endfunction() ############################################################################## # Helper function to add either as-needed, or whole_archive flag around a library. function(caffe_add_linker_flag lib output_var) if (BUILD_SHARED_LIBS) caffe_add_as_needed_flag(${lib} tmp) else() caffe_add_whole_archive_flag(${lib} tmp) endif() set(${output_var} ${tmp} PARENT_SCOPE) endfunction() ############################################################################## # Helper function to automatically generate __init__.py files where python # sources reside but there are no __init__.py present. function(caffe_autogen_init_py_files) file(GLOB_RECURSE all_python_files RELATIVE ${PROJECT_SOURCE_DIR} "${PROJECT_SOURCE_DIR}/caffe2/*.py") set(python_paths_need_init_py) foreach(python_file ${all_python_files}) get_filename_component(python_path ${python_file} PATH) string(REPLACE "/" ";" path_parts ${python_path}) set(rebuilt_path ${CMAKE_BINARY_DIR}) foreach(path_part ${path_parts}) set(rebuilt_path "${rebuilt_path}/${path_part}") list(APPEND python_paths_need_init_py ${rebuilt_path}) endforeach() endforeach() list(REMOVE_DUPLICATES python_paths_need_init_py) # Since the _pb2.py files are yet to be created, we will need to manually # add them to the list. list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe) list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe/proto) list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe2/proto) foreach(tmp ${python_paths_need_init_py}) if(NOT EXISTS ${tmp}/__init__.py) # message(STATUS "Generate " ${tmp}/__init__.py) file(WRITE ${tmp}/__init__.py "") endif() endforeach() endfunction() ############################################################################## # Creating a Caffe2 binary target with sources specified with relative path. # Usage: # caffe2_binary_target(target_name_or_src [] [] ...) # If only target_name_or_src is specified, this target is build with one single # source file and the target name is autogen from the filename. Otherwise, the # target name is given by the first argument and the rest are the source files # to build the target. function(caffe2_binary_target target_name_or_src) if (${ARGN}) set(__target ${target_name_or_src}) prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}") else() get_filename_component(__target ${target_name_or_src} NAME_WE) prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${target_name_or_src}") endif() add_executable(${__target} ${__srcs}) add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER}) target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS}) install(TARGETS ${__target} DESTINATION bin) endfunction() ############################################################################## # Helper function to add paths to system include directories. # # Anaconda distributions typically contain a lot of packages and some # of those can conflict with headers/libraries that must be sourced # from elsewhere. This helper ensures that Anaconda paths are always # added AFTER other include paths, such that it does not accidentally # takes precedence when it shouldn't. # # This is just a heuristic and does not have any guarantees. We can # add other corner cases here (as long as they are generic enough). # A complete include path cross checker is a final resort if this # hacky approach proves insufficient. # function(caffe2_include_directories) foreach(path IN LISTS ARGN) if (${path} MATCHES "/anaconda") include_directories(AFTER SYSTEM ${path}) else() include_directories(BEFORE SYSTEM ${path}) endif() endforeach() endfunction() ================================================ FILE: cmake/legacy/legacymake.cmake ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # This file contains legacy cmake scripts that is going to be removed # in a future release. # Add CMake modules. list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/legacy/Modules) # Add compiler flags. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing") # Include Caffe2 CMake utils. include(cmake/legacy/Utils.cmake) # Find dependencies. include(cmake/legacy/Dependencies.cmake) # Print configuration summary. include(cmake/legacy/Summary.cmake) detectron_print_config_summary() # Collect custom ops sources. file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc) file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu) # Install custom CPU ops lib. add_library( caffe2_detectron_custom_ops SHARED ${CUSTOM_OPS_CPU_SRCS}) target_include_directories( caffe2_detectron_custom_ops PRIVATE ${CAFFE2_INCLUDE_DIRS}) target_link_libraries(caffe2_detectron_custom_ops caffe2) install(TARGETS caffe2_detectron_custom_ops DESTINATION lib) # Install custom GPU ops lib. if (${HAVE_CUDA}) # Additional -I prefix is required for CMake versions before commit (< 3.7): # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594 list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS}) CUDA_ADD_LIBRARY( caffe2_detectron_custom_ops_gpu SHARED ${CUSTOM_OPS_CPU_SRCS} ${CUSTOM_OPS_GPU_SRCS}) target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu) install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib) endif() ================================================ FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl # Note: a GN pre-trained model DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 270000 STEPS: [0, 210000, 250000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl # Note: a GN pre-trained model DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl # Note: a GN pre-trained model DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 270000 STEPS: [0, 210000, 250000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl # Note: a GN pre-trained model DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml ================================================ # WARNING: this script uses **pre-computed** BN-based proposals, and is for quick debugging only. MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl # Note: a GN pre-trained model DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 270000 STEPS: [0, 210000, 250000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # WEIGHTS: N/A FREEZE_AT: 0 DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 270000 STEPS: [0, 210000, 250000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True USE_GN: True # Note: use GN on the FPN-specific layers RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform STEM_FUNC: basic_gn_stem # Note: this is a GN stem SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head # Note: this is a Conv GN head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # WEIGHTS: N/A FREEZE_AT: 0 DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 180000 STEPS: [0, 120000, 160000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 6000 RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 360000 STEPS: [0, 240000, 320000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 6000 RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 FASTER_RCNN: True KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('keypoints_coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 180000 STEPS: [0, 120000, 160000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare RESOLUTION: 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default: GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 6000 RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 360000 STEPS: [0, 240000, 320000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare RESOLUTION: 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default: GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 6000 RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet152_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 260000 STEPS: [0, 200000, 240000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (640, 672, 704, 736, 768, 800) # Scale jitter MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 BBOX_VOTE: ENABLED: True VOTE_TH: 0.9 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 BBOX_AUG: ENABLED: True SCORE_HEUR: UNION COORD_HEUR: UNION H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: False ASPECT_RATIOS: () ASPECT_RATIO_H_FLIP: False MASK_AUG: ENABLED: True HEUR: SOFT_AVG H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: False ASPECT_RATIOS: () ASPECT_RATIO_H_FLIP: False OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 180000 STEPS: [0, 120000, 160000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 360000 STEPS: [0, 240000, 320000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 130000 STEPS: [0, 100000, 120000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: head_builder.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 180000 STEPS: [0, 120000, 160000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare RESOLUTION: 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default: GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) MAX_ITER: 360000 STEPS: [0, 240000, 320000] RPN: SIZES: (32, 64, 128, 256, 512) FAST_RCNN: ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head ROI_XFORM_METHOD: RoIAlign MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare RESOLUTION: 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default: GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 360000 STEPS: [0, 240000, 320000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (800,) MAX_SIZE: 1333 IMS_PER_BATCH: 1 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml ================================================ MODEL: TYPE: retinanet CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 7 RPN_MIN_LEVEL: 3 COARSEST_STRIDE: 128 EXTRA_CONV_LEVELS: True RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 RETINANET: RETINANET_ON: True NUM_CONVS: 4 ASPECT_RATIOS: (1.0, 2.0, 0.5) SCALES_PER_OCTAVE: 3 ANCHOR_SCALE: 4 LOSS_GAMMA: 2.0 LOSS_ALPHA: 0.25 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 RPN_STRADDLE_THRESH: -1 # default 0 TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 10000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_R-50-C4_1x.yaml ================================================ MODEL: TYPE: rpn CONV_BODY: ResNet.add_ResNet50_conv4_body NUM_CLASSES: 81 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] RPN: SIZES: (32, 64, 128, 256, 512) TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') SCALE: 800 MAX_SIZE: 1333 USE_NCCL: False OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 81 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 32 WIDTH_PER_GROUP: 8 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet101_conv5_body NUM_CLASSES: 2 RPN_ONLY: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_RPN: True RPN_MAX_LEVEL: 6 RPN_MIN_LEVEL: 2 RPN_ANCHOR_START_SIZE: 32 RPN_ASPECT_RATIOS: (0.5, 1, 2) RESNETS: STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models TRANS_FUNC: bottleneck_transformation NUM_GROUPS: 64 WIDTH_PER_GROUP: 4 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 TEST: DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') SCALE: 800 MAX_SIZE: 1333 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 2000 OUTPUT_DIR: . ================================================ FILE: configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 1 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.0025 GAMMA: 0.1 MAX_ITER: 60000 STEPS: [0, 30000, 40000] # Equivalent schedules with... # 1 GPU: # BASE_LR: 0.0025 # MAX_ITER: 60000 # STEPS: [0, 30000, 40000] # 2 GPUs: # BASE_LR: 0.005 # MAX_ITER: 30000 # STEPS: [0, 15000, 20000] # 4 GPUs: # BASE_LR: 0.01 # MAX_ITER: 15000 # STEPS: [0, 7500, 10000] # 8 GPUs: # BASE_LR: 0.02 # MAX_ITER: 7500 # STEPS: [0, 3750, 5000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train',) SCALES: (500,) MAX_SIZE: 833 BATCH_SIZE_PER_IM: 256 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 500 MAX_SIZE: 833 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 2 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.005 GAMMA: 0.1 MAX_ITER: 30000 STEPS: [0, 15000, 20000] # Equivalent schedules with... # 1 GPU: # BASE_LR: 0.0025 # MAX_ITER: 60000 # STEPS: [0, 30000, 40000] # 2 GPUs: # BASE_LR: 0.005 # MAX_ITER: 30000 # STEPS: [0, 15000, 20000] # 4 GPUs: # BASE_LR: 0.01 # MAX_ITER: 15000 # STEPS: [0, 7500, 10000] # 8 GPUs: # BASE_LR: 0.02 # MAX_ITER: 7500 # STEPS: [0, 3750, 5000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train',) SCALES: (500,) MAX_SIZE: 833 BATCH_SIZE_PER_IM: 256 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 500 MAX_SIZE: 833 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 4 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.01 GAMMA: 0.1 MAX_ITER: 15000 STEPS: [0, 7500, 10000] # Equivalent schedules with... # 1 GPU: # BASE_LR: 0.0025 # MAX_ITER: 60000 # STEPS: [0, 30000, 40000] # 2 GPUs: # BASE_LR: 0.005 # MAX_ITER: 30000 # STEPS: [0, 15000, 20000] # 4 GPUs: # BASE_LR: 0.01 # MAX_ITER: 15000 # STEPS: [0, 7500, 10000] # 8 GPUs: # BASE_LR: 0.02 # MAX_ITER: 7500 # STEPS: [0, 3750, 5000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train',) SCALES: (500,) MAX_SIZE: 833 BATCH_SIZE_PER_IM: 256 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 500 MAX_SIZE: 833 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml ================================================ MODEL: TYPE: generalized_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 7500 STEPS: [0, 3750, 5000] # Equivalent schedules with... # 1 GPU: # BASE_LR: 0.0025 # MAX_ITER: 60000 # STEPS: [0, 30000, 40000] # 2 GPUs: # BASE_LR: 0.005 # MAX_ITER: 30000 # STEPS: [0, 15000, 20000] # 4 GPUs: # BASE_LR: 0.01 # MAX_ITER: 15000 # STEPS: [0, 7500, 10000] # 8 GPUs: # BASE_LR: 0.02 # MAX_ITER: 7500 # STEPS: [0, 3750, 5000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train',) SCALES: (500,) MAX_SIZE: 833 BATCH_SIZE_PER_IM: 256 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 500 MAX_SIZE: 833 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 OUTPUT_DIR: . ================================================ FILE: configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml ================================================ MODEL: TYPE: mask_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 81 FASTER_RCNN: True MASK_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 180000 STEPS: [0, 120000, 160000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 MRCNN: ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs RESOLUTION: 28 # (output mask resolution) default 14 ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 # default 7 ROI_XFORM_SAMPLING_RATIO: 2 # default 0 DILATION: 1 # default 2 CONV_INIT: MSRAFill # default GaussianFill TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') SCALES: (800,) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 RPN_PRE_NMS_TOP_N: 2000 # Per FPN level TEST: DATASETS: ('coco_2014_minival',) SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 RPN_PRE_NMS_TOP_N: 1000 # Per FPN level RPN_POST_NMS_TOP_N: 1000 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl # -- Test time augmentation example -- # BBOX_AUG: ENABLED: True SCORE_HEUR: UNION # AVG NOTE: cannot use AVG for e2e model COORD_HEUR: UNION # AVG NOTE: cannot use AVG for e2e model H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: False AREA_TH_LO: 2500 # 50^2 AREA_TH_HI: 32400 # 180^2 ASPECT_RATIOS: () ASPECT_RATIO_H_FLIP: False MASK_AUG: ENABLED: True HEUR: SOFT_AVG H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: False AREA_TH: 32400 # 180^2 ASPECT_RATIOS: () ASPECT_RATIO_H_FLIP: False BBOX_VOTE: ENABLED: True VOTE_TH: 0.9 # -- Test time augmentation example -- # USE_NCCL: False OUTPUT_DIR: . ================================================ FILE: configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml ================================================ MODEL: TYPE: keypoint_rcnn CONV_BODY: FPN.add_fpn_ResNet50_conv5_body NUM_CLASSES: 2 KEYPOINTS_ON: True NUM_GPUS: 8 SOLVER: WEIGHT_DECAY: 0.0001 LR_POLICY: steps_with_decay BASE_LR: 0.02 GAMMA: 0.1 MAX_ITER: 90000 STEPS: [0, 60000, 80000] FPN: FPN_ON: True MULTILEVEL_ROIS: True MULTILEVEL_RPN: True # accidentally True; disable in the future FAST_RCNN: ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 7 ROI_XFORM_SAMPLING_RATIO: 2 KRCNN: ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX NUM_STACKED_CONVS: 8 NUM_KEYPOINTS: 17 USE_DECONV_OUTPUT: True CONV_INIT: MSRAFill CONV_HEAD_DIM: 512 UP_SCALE: 2 HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) ROI_XFORM_METHOD: RoIAlign ROI_XFORM_RESOLUTION: 14 ROI_XFORM_SAMPLING_RATIO: 2 KEYPOINT_CONFIDENCE: bbox TRAIN: WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') SCALES: (640, 672, 704, 736, 768, 800) MAX_SIZE: 1333 BATCH_SIZE_PER_IM: 512 TEST: DATASETS: ('keypoints_coco_2014_minival',) PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) PROPOSAL_LIMIT: 1000 SCALE: 800 MAX_SIZE: 1333 NMS: 0.5 WEIGHTS: https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl # -- Test time augmentation example -- # BBOX_AUG: ENABLED: True SCORE_HEUR: AVG COORD_HEUR: AVG H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: False AREA_TH_LO: 2500 # 50^2 AREA_TH_HI: 32400 # 180^2 KPS_AUG: ENABLED: True HEUR: HM_AVG H_FLIP: True SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) MAX_SIZE: 2000 SCALE_H_FLIP: True SCALE_SIZE_DEP: True AREA_TH: 22500 # 150^2 ASPECT_RATIOS: () ASPECT_RATIO_H_FLIP: False # -- Test time augmentation example -- # OUTPUT_DIR: . ================================================ FILE: demo/NOTICE ================================================ The demo images are licensed as United States government work: https://www.usa.gov/government-works The image files were obtained on Jan 13, 2018 from the following URLs. 16004479832_a748d55f21_k.jpg https://www.flickr.com/photos/archivesnews/16004479832 18124840932_e42b3e377c_k.jpg https://www.flickr.com/photos/usnavy/18124840932 33887522274_eebd074106_k.jpg https://www.flickr.com/photos/usaid_pakistan/33887522274 15673749081_767a7fa63a_k.jpg https://www.flickr.com/photos/usnavy/15673749081 34501842524_3c858b3080_k.jpg https://www.flickr.com/photos/departmentofenergy/34501842524 24274813513_0cfd2ce6d0_k.jpg https://www.flickr.com/photos/dhsgov/24274813513 19064748793_bb942deea1_k.jpg https://www.flickr.com/photos/statephotos/19064748793 33823288584_1d21cf0a26_k.jpg https://www.flickr.com/photos/cbpphotos/33823288584 17790319373_bd19b24cfc_k.jpg https://www.flickr.com/photos/secdef/17790319373 ================================================ FILE: detectron/__init__.py ================================================ ================================================ FILE: detectron/core/__init__.py ================================================ ================================================ FILE: detectron/core/config.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Detectron config system. This file specifies default config options for Detectron. You should not change values in this file. Instead, you should write a config file (in yaml) and use merge_cfg_from_file(yaml_file) to load it and override the default options. Most tools in the tools directory take a --cfg option to specify an override file and an optional list of override (key, value) pairs: - See tools/{train,test}_net.py for example code that uses merge_cfg_from_file - See configs/*/*.yaml for example config files Detectron supports a lot of different model types, each of which has a lot of different options. The result is a HUGE set of configuration options. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from ast import literal_eval from future.utils import iteritems import copy import io import logging import numpy as np import os import os.path as osp import six from detectron.utils.collections import AttrDict from detectron.utils.io import cache_url logger = logging.getLogger(__name__) __C = AttrDict() # Consumers can get config by: # from detectron.core.config import cfg cfg = __C # Random note: avoid using '.ON' as a config key since yaml converts it to True; # prefer 'ENABLED' instead # ---------------------------------------------------------------------------- # # Training options # ---------------------------------------------------------------------------- # __C.TRAIN = AttrDict() # Initialize network with weights from this .pkl file __C.TRAIN.WEIGHTS = '' # Datasets to train on # Available dataset list: detectron.datasets.dataset_catalog.datasets() # If multiple datasets are listed, the model is trained on their union __C.TRAIN.DATASETS = () # Scales to use during training # Each scale is the pixel size of an image's shortest side # If multiple scales are listed, then one is selected uniformly at random for # each training image (i.e., scale jitter data augmentation) __C.TRAIN.SCALES = (600, ) # Max pixel size of the longest side of a scaled input image __C.TRAIN.MAX_SIZE = 1000 # Images *per GPU* in the training minibatch # Total images per minibatch = TRAIN.IMS_PER_BATCH * NUM_GPUS __C.TRAIN.IMS_PER_BATCH = 2 # RoI minibatch size *per image* (number of regions of interest [ROIs]) # Total number of RoIs per training minibatch = # TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS # E.g., a common configuration is: 512 * 2 * 8 = 8192 __C.TRAIN.BATCH_SIZE_PER_IM = 64 # Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) __C.TRAIN.FG_FRACTION = 0.25 # Overlap threshold for an RoI to be considered foreground (if >= FG_THRESH) __C.TRAIN.FG_THRESH = 0.5 # Overlap threshold for an RoI to be considered background (class = 0 if # overlap in [LO, HI)) __C.TRAIN.BG_THRESH_HI = 0.5 __C.TRAIN.BG_THRESH_LO = 0.0 # Use horizontally-flipped images during training? __C.TRAIN.USE_FLIPPED = True # Overlap required between an RoI and a ground-truth box in order for that # (RoI, gt box) pair to be used as a bounding-box regression training example __C.TRAIN.BBOX_THRESH = 0.5 # Snapshot (model checkpoint) period # Divide by NUM_GPUS to determine actual period (e.g., 80000/8 => 10000 iters) # to allow for linear training schedule scaling __C.TRAIN.SNAPSHOT_ITERS = 80000 # Train using these proposals # During training, all proposals specified in the file are used (no limit is # applied) # Proposal files must be in correspondence with the datasets listed in # TRAIN.DATASETS __C.TRAIN.PROPOSAL_FILES = () # Make minibatches from images that have similar aspect ratios (i.e. both # tall and thin or both short and wide) # This feature is critical for saving memory (and makes training slightly # faster) __C.TRAIN.ASPECT_GROUPING = True # ---------------------------------------------------------------------------- # # RPN training options # ---------------------------------------------------------------------------- # # Run GenerateProposals on GPU if set to True __C.TRAIN.GENERATE_PROPOSALS_ON_GPU = False # Minimum overlap required between an anchor and ground-truth box for the # (anchor, gt box) pair to be a positive example (IOU >= thresh ==> positive RPN # example) __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 # Maximum overlap allowed between an anchor and ground-truth box for the # (anchor, gt box) pair to be a negative examples (IOU < thresh ==> negative RPN # example) __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 # Target fraction of foreground (positive) examples per RPN minibatch __C.TRAIN.RPN_FG_FRACTION = 0.5 # Total number of RPN examples per image __C.TRAIN.RPN_BATCH_SIZE_PER_IM = 256 # NMS threshold used on RPN proposals (used during end-to-end training with RPN) __C.TRAIN.RPN_NMS_THRESH = 0.7 # Number of top scoring RPN proposals to keep before applying NMS # When FPN is used, this is *per FPN level* (not total) __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 # Number of top scoring RPN proposals to keep after applying NMS # This is the total number of RPN proposals produced (for both FPN and non-FPN # cases) __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 # Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels # Set to -1 or a large value, e.g. 100000, to disable pruning anchors __C.TRAIN.RPN_STRADDLE_THRESH = 0 # Proposal height and width both need to be greater than RPN_MIN_SIZE # (at orig image scale; not scale used during training or inference) __C.TRAIN.RPN_MIN_SIZE = 0 # Filter proposals that are inside of crowd regions by CROWD_FILTER_THRESH # "Inside" is measured as: proposal-with-crowd intersection area divided by # proposal area __C.TRAIN.CROWD_FILTER_THRESH = 0.7 # Ignore ground-truth objects with area < this threshold __C.TRAIN.GT_MIN_AREA = -1 # Freeze the backbone architecture during training if set to True __C.TRAIN.FREEZE_CONV_BODY = False # Training will resume from the latest snapshot (model checkpoint) found in the # output directory __C.TRAIN.AUTO_RESUME = True # Training will copy TRAIN.WEIGHTS and treat it as a candidate checkpoint __C.TRAIN.COPY_WEIGHTS = False # Add StopGrad at a specified stage so the bottom layers are frozen __C.TRAIN.FREEZE_AT = 2 # ---------------------------------------------------------------------------- # # Data loader options (see detectron/roi_data/loader.py for more info) # ---------------------------------------------------------------------------- # __C.DATA_LOADER = AttrDict() # Number of Python threads to use for the data loader (warning: using too many # threads can cause GIL-based interference with Python Ops leading to *slower* # training; 4 seems to be the sweet spot in our experience) __C.DATA_LOADER.NUM_THREADS = 4 # Size of the shared minibatch queue __C.DATA_LOADER.MINIBATCH_QUEUE_SIZE = 64 # Capacity of the per GPU blobs queue __C.DATA_LOADER.BLOBS_QUEUE_CAPACITY = 8 # ---------------------------------------------------------------------------- # # Inference ('test') options # ---------------------------------------------------------------------------- # __C.TEST = AttrDict() # Initialize network with weights from this .pkl file __C.TEST.WEIGHTS = '' # Datasets to test on # Available dataset list: detectron.datasets.dataset_catalog.datasets() # If multiple datasets are listed, testing is performed on each one sequentially __C.TEST.DATASETS = () # Scale to use during testing __C.TEST.SCALE = 600 # Max pixel size of the longest side of a scaled input image __C.TEST.MAX_SIZE = 1000 # Overlap threshold used for non-maximum suppression (suppress boxes with # IoU >= this threshold) __C.TEST.NMS = 0.3 # Apply Fast R-CNN style bounding-box regression if True __C.TEST.BBOX_REG = True # Test using these proposal files (must correspond with TEST.DATASETS) __C.TEST.PROPOSAL_FILES = () # Run GenerateProposals on GPU if set to True __C.TEST.GENERATE_PROPOSALS_ON_GPU = False # Limit on the number of proposals per image used during inference __C.TEST.PROPOSAL_LIMIT = 2000 # NMS threshold used on RPN proposals __C.TEST.RPN_NMS_THRESH = 0.7 # Number of top scoring RPN proposals to keep before applying NMS # When FPN is used, this is *per FPN level* (not total) __C.TEST.RPN_PRE_NMS_TOP_N = 12000 # Number of top scoring RPN proposals to keep after applying NMS # This is the total number of RPN proposals produced (for both FPN and non-FPN # cases) __C.TEST.RPN_POST_NMS_TOP_N = 2000 # Proposal height and width both need to be greater than RPN_MIN_SIZE # (at orig image scale; not scale used during training or inference) __C.TEST.RPN_MIN_SIZE = 0 # Maximum number of detections to return per image (100 is based on the limit # established for the COCO dataset) __C.TEST.DETECTIONS_PER_IM = 100 # Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to # balance obtaining high recall with not having too many low precision # detections that will slow down inference post processing steps (like NMS) __C.TEST.SCORE_THRESH = 0.05 # Save detection results files if True # If false, results files are cleaned up (they can be large) after local # evaluation __C.TEST.COMPETITION_MODE = True # Evaluate detections with the COCO json dataset eval code even if it's not the # evaluation code for the dataset (e.g. evaluate PASCAL VOC results using the # COCO API to get COCO style AP on PASCAL VOC) __C.TEST.FORCE_JSON_DATASET_EVAL = False # [Inferred value; do not set directly in a config] # Indicates if precomputed proposals are used at test time # Not set for 1-stage models and 2-stage models with RPN subnetwork enabled __C.TEST.PRECOMPUTED_PROPOSALS = True # Evaluate proposals in class-specific Average Recall (AR). # It means that one first computes AR within each category and then averages # over the categories. It is not biased towards the AR of frequent categories # compared with class-agnostic AR. __C.TEST.CLASS_SPECIFIC_AR = False # ---------------------------------------------------------------------------- # # Test-time augmentations for bounding box detection # See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example # ---------------------------------------------------------------------------- # __C.TEST.BBOX_AUG = AttrDict() # Enable test-time augmentation for bounding box detection if True __C.TEST.BBOX_AUG.ENABLED = False # Heuristic used to combine predicted box scores # Valid options: ('ID', 'AVG', 'UNION') __C.TEST.BBOX_AUG.SCORE_HEUR = 'UNION' # Heuristic used to combine predicted box coordinates # Valid options: ('ID', 'AVG', 'UNION') __C.TEST.BBOX_AUG.COORD_HEUR = 'UNION' # Horizontal flip at the original scale (id transform) __C.TEST.BBOX_AUG.H_FLIP = False # Each scale is the pixel size of an image's shortest side __C.TEST.BBOX_AUG.SCALES = () # Max pixel size of the longer side __C.TEST.BBOX_AUG.MAX_SIZE = 4000 # Horizontal flip at each scale __C.TEST.BBOX_AUG.SCALE_H_FLIP = False # Apply scaling based on object size __C.TEST.BBOX_AUG.SCALE_SIZE_DEP = False __C.TEST.BBOX_AUG.AREA_TH_LO = 50**2 __C.TEST.BBOX_AUG.AREA_TH_HI = 180**2 # Each aspect ratio is relative to image width __C.TEST.BBOX_AUG.ASPECT_RATIOS = () # Horizontal flip at each aspect ratio __C.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP = False # ---------------------------------------------------------------------------- # # Test-time augmentations for mask detection # See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example # ---------------------------------------------------------------------------- # __C.TEST.MASK_AUG = AttrDict() # Enable test-time augmentation for instance mask detection if True __C.TEST.MASK_AUG.ENABLED = False # Heuristic used to combine mask predictions # SOFT prefix indicates that the computation is performed on soft masks # Valid options: ('SOFT_AVG', 'SOFT_MAX', 'LOGIT_AVG') __C.TEST.MASK_AUG.HEUR = 'SOFT_AVG' # Horizontal flip at the original scale (id transform) __C.TEST.MASK_AUG.H_FLIP = False # Each scale is the pixel size of an image's shortest side __C.TEST.MASK_AUG.SCALES = () # Max pixel size of the longer side __C.TEST.MASK_AUG.MAX_SIZE = 4000 # Horizontal flip at each scale __C.TEST.MASK_AUG.SCALE_H_FLIP = False # Apply scaling based on object size __C.TEST.MASK_AUG.SCALE_SIZE_DEP = False __C.TEST.MASK_AUG.AREA_TH = 180**2 # Each aspect ratio is relative to image width __C.TEST.MASK_AUG.ASPECT_RATIOS = () # Horizontal flip at each aspect ratio __C.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP = False # ---------------------------------------------------------------------------- # # Test-augmentations for keypoints detection # configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml # ---------------------------------------------------------------------------- # __C.TEST.KPS_AUG = AttrDict() # Enable test-time augmentation for keypoint detection if True __C.TEST.KPS_AUG.ENABLED = False # Heuristic used to combine keypoint predictions # Valid options: ('HM_AVG', 'HM_MAX') __C.TEST.KPS_AUG.HEUR = 'HM_AVG' # Horizontal flip at the original scale (id transform) __C.TEST.KPS_AUG.H_FLIP = False # Each scale is the pixel size of an image's shortest side __C.TEST.KPS_AUG.SCALES = () # Max pixel size of the longer side __C.TEST.KPS_AUG.MAX_SIZE = 4000 # Horizontal flip at each scale __C.TEST.KPS_AUG.SCALE_H_FLIP = False # Apply scaling based on object size __C.TEST.KPS_AUG.SCALE_SIZE_DEP = False __C.TEST.KPS_AUG.AREA_TH = 180**2 # Eeach aspect ratio is realtive to image width __C.TEST.KPS_AUG.ASPECT_RATIOS = () # Horizontal flip at each aspect ratio __C.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP = False # ---------------------------------------------------------------------------- # # Soft NMS # ---------------------------------------------------------------------------- # __C.TEST.SOFT_NMS = AttrDict() # Use soft NMS instead of standard NMS if set to True __C.TEST.SOFT_NMS.ENABLED = False # See soft NMS paper for definition of these options __C.TEST.SOFT_NMS.METHOD = 'linear' __C.TEST.SOFT_NMS.SIGMA = 0.5 # For the soft NMS overlap threshold, we simply use TEST.NMS # ---------------------------------------------------------------------------- # # Bounding box voting (from the Multi-Region CNN paper) # ---------------------------------------------------------------------------- # __C.TEST.BBOX_VOTE = AttrDict() # Use box voting if set to True __C.TEST.BBOX_VOTE.ENABLED = False # We use TEST.NMS threshold for the NMS step. VOTE_TH overlap threshold # is used to select voting boxes (IoU >= VOTE_TH) for each box that survives NMS __C.TEST.BBOX_VOTE.VOTE_TH = 0.8 # The method used to combine scores when doing bounding box voting # Valid options include ('ID', 'AVG', 'IOU_AVG', 'GENERALIZED_AVG', 'QUASI_SUM') __C.TEST.BBOX_VOTE.SCORING_METHOD = 'ID' # Hyperparameter used by the scoring method (it has different meanings for # different methods) __C.TEST.BBOX_VOTE.SCORING_METHOD_BETA = 1.0 # ---------------------------------------------------------------------------- # # Model options # ---------------------------------------------------------------------------- # __C.MODEL = AttrDict() # The type of model to use # The string must match a function in the modeling.model_builder module # (e.g., 'generalized_rcnn', 'mask_rcnn', ...) __C.MODEL.TYPE = '' # The backbone conv body to use # The string must match a function that is imported in modeling.model_builder # (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN # backbone) __C.MODEL.CONV_BODY = '' # Number of classes in the dataset; must be set # E.g., 81 for COCO (80 foreground + 1 background) __C.MODEL.NUM_CLASSES = -1 # Use a class agnostic bounding box regressor instead of the default per-class # regressor __C.MODEL.CLS_AGNOSTIC_BBOX_REG = False # Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets # These are empirically chosen to approximately lead to unit variance targets __C.MODEL.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) # The meaning of FASTER_RCNN depends on the context (training vs. inference): # 1) During training, FASTER_RCNN = True means that end-to-end training will be # used to jointly train the RPN subnetwork and the Fast R-CNN subnetwork # (Faster R-CNN = RPN + Fast R-CNN). # 2) During inference, FASTER_RCNN = True means that the model's RPN subnetwork # will be used to generate proposals rather than relying on precomputed # proposals. Note that FASTER_RCNN = True can be used at inference time even # if the Faster R-CNN model was trained with stagewise training (which # consists of alternating between RPN and Fast R-CNN training in a way that # finally leads to a single network). __C.MODEL.FASTER_RCNN = False # Indicates the model makes instance mask predictions (as in Mask R-CNN) __C.MODEL.MASK_ON = False # Indicates the model makes keypoint predictions (as in Mask R-CNN for # keypoints) __C.MODEL.KEYPOINTS_ON = False # Indicates the model's computation terminates with the production of RPN # proposals (i.e., it outputs proposals ONLY, no actual object detections) __C.MODEL.RPN_ONLY = False # Caffe2 net execution type # Use 'prof_dag' to get profiling statistics __C.MODEL.EXECUTION_TYPE = 'dag' # ---------------------------------------------------------------------------- # # RetinaNet options # ---------------------------------------------------------------------------- # __C.RETINANET = AttrDict() # RetinaNet is used (instead of Fast/er/Mask R-CNN/R-FCN/RPN) if True __C.RETINANET.RETINANET_ON = False # Anchor aspect ratios to use __C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0) # Anchor scales per octave __C.RETINANET.SCALES_PER_OCTAVE = 3 # At each FPN level, we generate anchors based on their scale, aspect_ratio, # stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE __C.RETINANET.ANCHOR_SCALE = 4 # Convolutions to use in the cls and bbox tower # NOTE: this doesn't include the last conv for logits __C.RETINANET.NUM_CONVS = 4 # Weight for bbox_regression loss __C.RETINANET.BBOX_REG_WEIGHT = 1.0 # Smooth L1 loss beta for bbox regression __C.RETINANET.BBOX_REG_BETA = 0.11 # During inference, #locs to select based on cls score before NMS is performed # per FPN level __C.RETINANET.PRE_NMS_TOP_N = 1000 # IoU overlap ratio for labeling an anchor as positive # Anchors with >= iou overlap are labeled positive __C.RETINANET.POSITIVE_OVERLAP = 0.5 # IoU overlap ratio for labeling an anchor as negative # Anchors with < iou overlap are labeled negative __C.RETINANET.NEGATIVE_OVERLAP = 0.4 # Focal loss parameter: alpha __C.RETINANET.LOSS_ALPHA = 0.25 # Focal loss parameter: gamma __C.RETINANET.LOSS_GAMMA = 2.0 # Prior prob for the positives at the beginning of training. This is used to set # the bias init for the logits layer __C.RETINANET.PRIOR_PROB = 0.01 # Whether classification and bbox branch tower should be shared or not __C.RETINANET.SHARE_CLS_BBOX_TOWER = False # Use class specific bounding box regression instead of the default class # agnostic regression __C.RETINANET.CLASS_SPECIFIC_BBOX = False # Whether softmax should be used in classification branch training __C.RETINANET.SOFTMAX = False # Inference cls score threshold, anchors with score > INFERENCE_TH are # considered for inference __C.RETINANET.INFERENCE_TH = 0.05 # ---------------------------------------------------------------------------- # # Solver options # Note: all solver options are used exactly as specified; the implication is # that if you switch from training on 1 GPU to N GPUs, you MUST adjust the # solver configuration accordingly. We suggest using gradual warmup and the # linear learning rate scaling rule as described in # "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" Goyal et al. # https://arxiv.org/abs/1706.02677 # ---------------------------------------------------------------------------- # __C.SOLVER = AttrDict() # Base learning rate for the specified schedule __C.SOLVER.BASE_LR = 0.001 # Schedule type (see functions in utils.lr_policy for options) # E.g., 'step', 'steps_with_decay', ... __C.SOLVER.LR_POLICY = 'step' # Some LR Policies (by example): # 'step' # lr = SOLVER.BASE_LR * SOLVER.GAMMA ** (cur_iter // SOLVER.STEP_SIZE) # 'steps_with_decay' # SOLVER.STEPS = [0, 60000, 80000] # SOLVER.GAMMA = 0.1 # lr = SOLVER.BASE_LR * SOLVER.GAMMA ** current_step # iters [0, 59999] are in current_step = 0, iters [60000, 79999] are in # current_step = 1, and so on # 'steps_with_lrs' # SOLVER.STEPS = [0, 60000, 80000] # SOLVER.LRS = [0.02, 0.002, 0.0002] # lr = LRS[current_step] # 'cosine_decay' # lr = SOLVER.BASE_LR * (cos(PI * cur_iter / SOLVER.MAX_ITER) * 0.5 + 0.5) # 'exp_decay' # lr smoothly decays from SOLVER.BASE_LR to SOLVER.GAMMA * SOLVER.BASE_LR # lr = SOLVER.BASE_LR * exp(np.log(SOLVER.GAMMA) * cur_iter / SOLVER.MAX_ITER) # Hyperparameter used by the specified policy # For 'step', the current LR is multiplied by SOLVER.GAMMA at each step # For 'exp_decay', SOLVER.GAMMA is the ratio between the final and initial LR. __C.SOLVER.GAMMA = 0.1 # Uniform step size for 'steps' policy __C.SOLVER.STEP_SIZE = 30000 # Non-uniform step iterations for 'steps_with_decay' or 'steps_with_lrs' # policies __C.SOLVER.STEPS = [] # Learning rates to use with 'steps_with_lrs' policy __C.SOLVER.LRS = [] # Maximum number of SGD iterations __C.SOLVER.MAX_ITER = 40000 # Momentum to use with SGD __C.SOLVER.MOMENTUM = 0.9 # L2 regularization hyperparameter __C.SOLVER.WEIGHT_DECAY = 0.0005 # L2 regularization hyperparameter for GroupNorm's parameters __C.SOLVER.WEIGHT_DECAY_GN = 0.0 # Warm up to SOLVER.BASE_LR over this number of SGD iterations __C.SOLVER.WARM_UP_ITERS = 500 # Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR __C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0 # WARM_UP_METHOD can be either 'constant' or 'linear' (i.e., gradual) __C.SOLVER.WARM_UP_METHOD = 'linear' # Scale the momentum update history by new_lr / old_lr when updating the # learning rate (this is correct given MomentumSGDUpdateOp) __C.SOLVER.SCALE_MOMENTUM = True # Only apply the correction if the relative LR change exceeds this threshold # (prevents ever change in linear warm up from scaling the momentum by a tiny # amount; momentum scaling is only important if the LR change is large) __C.SOLVER.SCALE_MOMENTUM_THRESHOLD = 1.1 # Suppress logging of changes to LR unless the relative change exceeds this # threshold (prevents linear warm up from spamming the training log) __C.SOLVER.LOG_LR_CHANGE_THRESHOLD = 1.1 # ---------------------------------------------------------------------------- # # Fast R-CNN options # ---------------------------------------------------------------------------- # __C.FAST_RCNN = AttrDict() # The type of RoI head to use for bounding box classification and regression # The string must match a function this is imported in modeling.model_builder # (e.g., 'head_builder.add_roi_2mlp_head' to specify a two hidden layer MLP) __C.FAST_RCNN.ROI_BOX_HEAD = '' # Hidden layer dimension when using an MLP for the RoI box head __C.FAST_RCNN.MLP_HEAD_DIM = 1024 # Hidden Conv layer dimension when using Convs for the RoI box head __C.FAST_RCNN.CONV_HEAD_DIM = 256 # Number of stacked Conv layers in the RoI box head __C.FAST_RCNN.NUM_STACKED_CONVS = 4 # RoI transformation function (e.g., RoIPool or RoIAlign) # (RoIPoolF is the same as RoIPool; ignore the trailing 'F') __C.FAST_RCNN.ROI_XFORM_METHOD = 'RoIPoolF' # Number of grid sampling points in RoIAlign (usually use 2) # Only applies to RoIAlign __C.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO = 0 # RoI transform output resolution # Note: some models may have constraints on what they can use, e.g. they use # pretrained FC layers like in VGG16, and will ignore this option __C.FAST_RCNN.ROI_XFORM_RESOLUTION = 14 # ---------------------------------------------------------------------------- # # RPN options # ---------------------------------------------------------------------------- # __C.RPN = AttrDict() # [Infered value; do not set directly in a config] # Indicates that the model contains an RPN subnetwork __C.RPN.RPN_ON = False # RPN anchor sizes given in absolute pixels w.r.t. the scaled network input # Note: these options are *not* used by FPN RPN; see FPN.RPN* options __C.RPN.SIZES = (64, 128, 256, 512) # Stride of the feature map that RPN is attached __C.RPN.STRIDE = 16 # RPN anchor aspect ratios __C.RPN.ASPECT_RATIOS = (0.5, 1, 2) # ---------------------------------------------------------------------------- # # FPN options # ---------------------------------------------------------------------------- # __C.FPN = AttrDict() # FPN is enabled if True __C.FPN.FPN_ON = False # Channel dimension of the FPN feature levels __C.FPN.DIM = 256 # Initialize the lateral connections to output zero if True __C.FPN.ZERO_INIT_LATERAL = False # Stride of the coarsest FPN level # This is needed so the input can be padded properly __C.FPN.COARSEST_STRIDE = 32 # # FPN may be used for just RPN, just object detection, or both # # Use FPN for RoI transform for object detection if True __C.FPN.MULTILEVEL_ROIS = False # Hyperparameters for the RoI-to-FPN level mapping heuristic __C.FPN.ROI_CANONICAL_SCALE = 224 # s0 __C.FPN.ROI_CANONICAL_LEVEL = 4 # k0: where s0 maps to # Coarsest level of the FPN pyramid __C.FPN.ROI_MAX_LEVEL = 5 # Finest level of the FPN pyramid __C.FPN.ROI_MIN_LEVEL = 2 # Use FPN for RPN if True __C.FPN.MULTILEVEL_RPN = False # Coarsest level of the FPN pyramid __C.FPN.RPN_MAX_LEVEL = 6 # Finest level of the FPN pyramid __C.FPN.RPN_MIN_LEVEL = 2 # FPN RPN anchor aspect ratios __C.FPN.RPN_ASPECT_RATIOS = (0.5, 1, 2) # RPN anchors start at this size on RPN_MIN_LEVEL # The anchor size doubled each level after that # With a default of 32 and levels 2 to 6, we get anchor sizes of 32 to 512 __C.FPN.RPN_ANCHOR_START_SIZE = 32 # Use extra FPN levels, as done in the RetinaNet paper __C.FPN.EXTRA_CONV_LEVELS = False # Use GroupNorm in the FPN-specific layers (lateral, etc.) __C.FPN.USE_GN = False # ---------------------------------------------------------------------------- # # Mask R-CNN options ("MRCNN" means Mask R-CNN) # ---------------------------------------------------------------------------- # __C.MRCNN = AttrDict() # The type of RoI head to use for instance mask prediction # The string must match a function this is imported in modeling.model_builder # (e.g., 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs') __C.MRCNN.ROI_MASK_HEAD = '' # Resolution of mask predictions __C.MRCNN.RESOLUTION = 14 # RoI transformation function and associated options __C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign' # RoI transformation function (e.g., RoIPool or RoIAlign) __C.MRCNN.ROI_XFORM_RESOLUTION = 7 # Number of grid sampling points in RoIAlign (usually use 2) # Only applies to RoIAlign __C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0 # Number of channels in the mask head __C.MRCNN.DIM_REDUCED = 256 # Use dilated convolution in the mask head __C.MRCNN.DILATION = 2 # Upsample the predicted masks by this factor __C.MRCNN.UPSAMPLE_RATIO = 1 # Use a fully-connected layer to predict the final masks instead of a conv layer __C.MRCNN.USE_FC_OUTPUT = False # Weight initialization method for the mask head and mask output layers __C.MRCNN.CONV_INIT = 'GaussianFill' # Use class specific mask predictions if True (otherwise use class agnostic mask # predictions) __C.MRCNN.CLS_SPECIFIC_MASK = True # Multi-task loss weight for masks __C.MRCNN.WEIGHT_LOSS_MASK = 1.0 # Binarization threshold for converting soft masks to hard masks __C.MRCNN.THRESH_BINARIZE = 0.5 # ---------------------------------------------------------------------------- # # Keypoint Mask R-CNN options ("KRCNN" = Mask R-CNN with Keypoint support) # ---------------------------------------------------------------------------- # __C.KRCNN = AttrDict() # The type of RoI head to use for instance keypoint prediction # The string must match a function this is imported in modeling.model_builder # (e.g., 'keypoint_rcnn_heads.add_roi_pose_head_v1convX') __C.KRCNN.ROI_KEYPOINTS_HEAD = '' # Output size (and size loss is computed on), e.g., 56x56 __C.KRCNN.HEATMAP_SIZE = -1 # Use bilinear interpolation to upsample the final heatmap by this factor __C.KRCNN.UP_SCALE = -1 # Apply a ConvTranspose layer to the hidden representation computed by the # keypoint head prior to predicting the per-keypoint heatmaps __C.KRCNN.USE_DECONV = False # Channel dimension of the hidden representation produced by the ConvTranspose __C.KRCNN.DECONV_DIM = 256 # Use a ConvTranspose layer to predict the per-keypoint heatmaps __C.KRCNN.USE_DECONV_OUTPUT = False # Use dilation in the keypoint head __C.KRCNN.DILATION = 1 # Size of the kernels to use in all ConvTranspose operations __C.KRCNN.DECONV_KERNEL = 4 # Number of keypoints in the dataset (e.g., 17 for COCO) __C.KRCNN.NUM_KEYPOINTS = -1 # Number of stacked Conv layers in keypoint head __C.KRCNN.NUM_STACKED_CONVS = 8 # Dimension of the hidden representation output by the keypoint head __C.KRCNN.CONV_HEAD_DIM = 256 # Conv kernel size used in the keypoint head __C.KRCNN.CONV_HEAD_KERNEL = 3 # Conv kernel weight filling function __C.KRCNN.CONV_INIT = 'GaussianFill' # Use NMS based on OKS if True __C.KRCNN.NMS_OKS = False # Source of keypoint confidence # Valid options: ('bbox', 'logit', 'prob') __C.KRCNN.KEYPOINT_CONFIDENCE = 'bbox' # Standard ROI XFORM options (see FAST_RCNN or MRCNN options) __C.KRCNN.ROI_XFORM_METHOD = 'RoIAlign' __C.KRCNN.ROI_XFORM_RESOLUTION = 7 __C.KRCNN.ROI_XFORM_SAMPLING_RATIO = 0 # Minimum number of labeled keypoints that must exist in a minibatch (otherwise # the minibatch is discarded) __C.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH = 20 # When infering the keypoint locations from the heatmap, don't scale the heatmap # below this minimum size __C.KRCNN.INFERENCE_MIN_SIZE = 0 # Multi-task loss weight to use for keypoints # Recommended values: # - use 1.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is True # - use 4.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False __C.KRCNN.LOSS_WEIGHT = 1.0 # Normalize by the total number of visible keypoints in the minibatch if True. # Otherwise, normalize by the total number of keypoints that could ever exist # in the minibatch. See comments in modeling.model_builder.add_keypoint_losses # for detailed discussion. __C.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS = True # ---------------------------------------------------------------------------- # # R-FCN options # ---------------------------------------------------------------------------- # __C.RFCN = AttrDict() # Position-sensitive RoI pooling output grid size (height and width) __C.RFCN.PS_GRID_SIZE = 3 # ---------------------------------------------------------------------------- # # ResNets options ("ResNets" = ResNet and ResNeXt) # ---------------------------------------------------------------------------- # __C.RESNETS = AttrDict() # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt __C.RESNETS.NUM_GROUPS = 1 # Baseline width of each group __C.RESNETS.WIDTH_PER_GROUP = 64 # Place the stride 2 conv on the 1x1 filter # Use True only for the original MSRA ResNet; use False for C2 and Torch models __C.RESNETS.STRIDE_1X1 = True # Residual transformation function __C.RESNETS.TRANS_FUNC = 'bottleneck_transformation' # ResNet's stem function (conv1 and pool1) __C.RESNETS.STEM_FUNC = 'basic_bn_stem' # ResNet's shortcut function __C.RESNETS.SHORTCUT_FUNC = 'basic_bn_shortcut' # Apply dilation in stage "res5" __C.RESNETS.RES5_DILATION = 1 # ---------------------------------------------------------------------------- # # GroupNorm options # ---------------------------------------------------------------------------- # __C.GROUP_NORM = AttrDict() # Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS) __C.GROUP_NORM.DIM_PER_GP = -1 # Number of groups in GroupNorm (-1 if using DIM_PER_GP) __C.GROUP_NORM.NUM_GROUPS = 32 # GroupNorm's small constant in the denominator __C.GROUP_NORM.EPSILON = 1e-5 # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # # Number of GPUs to use (applies to both training and testing) __C.NUM_GPUS = 1 # Use NCCL for all reduce, otherwise use muji # Warning: if set to True, you may experience deadlocks __C.USE_NCCL = False # The mapping from image coordinates to feature map coordinates might cause # some boxes that are distinct in image space to become identical in feature # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor # for identifying duplicate boxes. # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 __C.DEDUP_BOXES = 1 / 16. # Clip bounding box transformation predictions to prevent np.exp from # overflowing # Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels __C.BBOX_XFORM_CLIP = np.log(1000. / 16.) # Pixel mean values (BGR order) as a (1, 1, 3) array # We use the same pixel mean for all networks even though it's not exactly what # they were trained with # "Fun" fact: the history of where these values comes from is lost __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) # For reproducibility...but not really because modern fast GPU libraries use # non-deterministic op implementations __C.RNG_SEED = 3 # A small number that's used many times __C.EPS = 1e-14 # Root directory of project __C.ROOT_DIR = os.getcwd() # Output basedir __C.OUTPUT_DIR = '/tmp' # Name (or path to) the matlab executable __C.MATLAB = 'matlab' # Reduce memory usage with memonger gradient blob sharing __C.MEMONGER = True # Futher reduce memory by allowing forward pass activations to be shared when # possible. Note that this will cause activation blob inspection (values, # shapes, etc.) to be meaningless when activation blobs are reused. __C.MEMONGER_SHARE_ACTIVATIONS = False # Dump detection visualizations __C.VIS = False # Score threshold for visualization __C.VIS_TH = 0.9 # Expected results should take the form of a list of expectations, each # specified by four elements (dataset, task, metric, expected value). For # example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387]] __C.EXPECTED_RESULTS = [] # Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS __C.EXPECTED_RESULTS_RTOL = 0.1 __C.EXPECTED_RESULTS_ATOL = 0.005 # When the expected value specifies a mean and standard deviation, we check # that the actual value is within mean +/- SIGMA_TOL * std __C.EXPECTED_RESULTS_SIGMA_TOL = 4 # Set to send email in case of an EXPECTED_RESULTS failure __C.EXPECTED_RESULTS_EMAIL = '' # Models and proposals referred to by URL are downloaded to a local cache # specified by DOWNLOAD_CACHE __C.DOWNLOAD_CACHE = '/tmp/detectron-download-cache' # ---------------------------------------------------------------------------- # # Cluster options # ---------------------------------------------------------------------------- # __C.CLUSTER = AttrDict() # Flag to indicate if the code is running in a cluster environment __C.CLUSTER.ON_CLUSTER = False # ---------------------------------------------------------------------------- # # Deprecated options # If an option is removed from the code and you don't want to break existing # yaml configs, you can add the full config key as a string to the set below. # ---------------------------------------------------------------------------- # _DEPRECATED_KEYS = set( { 'FINAL_MSG', 'MODEL.DILATION', 'ROOT_GPU_ID', 'RPN.ON', 'TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED', 'TRAIN.DROPOUT', 'USE_GPU_NMS', 'TEST.NUM_TEST_IMAGES', } ) # ---------------------------------------------------------------------------- # # Renamed options # If you rename a config option, record the mapping from the old name to the new # name in the dictionary below. Optionally, if the type also changed, you can # make the value a tuple that specifies first the renamed key and then # instructions for how to edit the config file. # ---------------------------------------------------------------------------- # _RENAMED_KEYS = { 'EXAMPLE.RENAMED.KEY': 'EXAMPLE.KEY', # Dummy example to follow 'MODEL.PS_GRID_SIZE': 'RFCN.PS_GRID_SIZE', 'MODEL.ROI_HEAD': 'FAST_RCNN.ROI_BOX_HEAD', 'MRCNN.MASK_HEAD_NAME': 'MRCNN.ROI_MASK_HEAD', 'TRAIN.DATASET': ( 'TRAIN.DATASETS', "Also convert to a tuple, e.g., " + "'coco_2014_train' -> ('coco_2014_train',) or " + "'coco_2014_train:coco_2014_valminusminival' -> " + "('coco_2014_train', 'coco_2014_valminusminival')" ), 'TRAIN.PROPOSAL_FILE': ( 'TRAIN.PROPOSAL_FILES', "Also convert to a tuple, e.g., " + "'path/to/file' -> ('path/to/file',) or " + "'path/to/file1:path/to/file2' -> " + "('path/to/file1', 'path/to/file2')" ), 'TEST.SCALES': ( 'TEST.SCALE', "Also convert from a tuple, e.g. (600, ), " + "to a integer, e.g. 600." ), 'TEST.DATASET': ( 'TEST.DATASETS', "Also convert from a string, e.g 'coco_2014_minival', " + "to a tuple, e.g. ('coco_2014_minival', )." ), 'TEST.PROPOSAL_FILE': ( 'TEST.PROPOSAL_FILES', "Also convert from a string, e.g. '/path/to/props.pkl', " + "to a tuple, e.g. ('/path/to/props.pkl', )." ), } # ---------------------------------------------------------------------------- # # Renamed modules # If a module containing a data structure used in the config (e.g. AttrDict) # is renamed/moved and you don't want to break loading of existing yaml configs # (e.g. from weights files) you can specify the renamed module below. # ---------------------------------------------------------------------------- # _RENAMED_MODULES = { 'utils.collections': 'detectron.utils.collections', } def assert_and_infer_cfg(cache_urls=True, make_immutable=True): """Call this function in your script after you have finished setting all cfg values that are necessary (e.g., merging a config from a file, merging command line config options, etc.). By default, this function will also mark the global cfg as immutable to prevent changing the global cfg settings during script execution (which can lead to hard to debug errors or code that's harder to understand than is necessary). """ if __C.MODEL.RPN_ONLY or __C.MODEL.FASTER_RCNN: __C.RPN.RPN_ON = True if __C.RPN.RPN_ON or __C.RETINANET.RETINANET_ON: __C.TEST.PRECOMPUTED_PROPOSALS = False if cache_urls: cache_cfg_urls() if make_immutable: cfg.immutable(True) def cache_cfg_urls(): """Download URLs in the config, cache them locally, and rewrite cfg to make use of the locally cached file. """ __C.TRAIN.WEIGHTS = cache_url(__C.TRAIN.WEIGHTS, __C.DOWNLOAD_CACHE) __C.TEST.WEIGHTS = cache_url(__C.TEST.WEIGHTS, __C.DOWNLOAD_CACHE) __C.TRAIN.PROPOSAL_FILES = tuple( cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TRAIN.PROPOSAL_FILES ) __C.TEST.PROPOSAL_FILES = tuple( cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TEST.PROPOSAL_FILES ) def get_output_dir(datasets, training=True): """Get the output directory determined by the current global config.""" assert isinstance(datasets, tuple([tuple, list] + list(six.string_types))), \ 'datasets argument must be of type tuple, list or string' is_string = isinstance(datasets, six.string_types) dataset_name = datasets if is_string else ':'.join(datasets) tag = 'train' if training else 'test' # //// outdir = osp.join(__C.OUTPUT_DIR, tag, dataset_name, __C.MODEL.TYPE) if not osp.exists(outdir): os.makedirs(outdir) return outdir def load_cfg(cfg_to_load): """Wrapper around yaml.load used for maintaining backward compatibility""" file_types = [file, io.IOBase] if six.PY2 else [io.IOBase] # noqa false positive expected_types = tuple(file_types + list(six.string_types)) assert isinstance(cfg_to_load, expected_types), \ 'Expected one of {}, got {}'.format(expected_types, type(cfg_to_load)) if isinstance(cfg_to_load, tuple(file_types)): cfg_to_load = ''.join(cfg_to_load.readlines()) for old_module, new_module in iteritems(_RENAMED_MODULES): # yaml object encoding: !!python/object/new:. old_module, new_module = 'new:' + old_module, 'new:' + new_module cfg_to_load = cfg_to_load.replace(old_module, new_module) # Import inline due to a circular dependency between env.py and config.py import detectron.utils.env as envu return envu.yaml_load(cfg_to_load) def merge_cfg_from_file(cfg_filename): """Load a yaml config file and merge it into the global config.""" with open(cfg_filename, 'r') as f: yaml_cfg = AttrDict(load_cfg(f)) _merge_a_into_b(yaml_cfg, __C) def merge_cfg_from_cfg(cfg_other): """Merge `cfg_other` into the global config.""" _merge_a_into_b(cfg_other, __C) def merge_cfg_from_list(cfg_list): """Merge config keys, values in a list (e.g., from command line) into the global config. For example, `cfg_list = ['TEST.NMS', 0.5]`. """ assert len(cfg_list) % 2 == 0 for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]): if _key_is_deprecated(full_key): continue if _key_is_renamed(full_key): _raise_key_rename_error(full_key) key_list = full_key.split('.') d = __C for subkey in key_list[:-1]: assert subkey in d, 'Non-existent key: {}'.format(full_key) d = d[subkey] subkey = key_list[-1] assert subkey in d, 'Non-existent key: {}'.format(full_key) value = _decode_cfg_value(v) value = _check_and_coerce_cfg_value_type( value, d[subkey], subkey, full_key ) d[subkey] = value def _merge_a_into_b(a, b, stack=None): """Merge config dictionary a into config dictionary b, clobbering the options in b whenever they are also specified in a. """ assert isinstance(a, AttrDict), \ '`a` (cur type {}) must be an instance of {}'.format(type(a), AttrDict) assert isinstance(b, AttrDict), \ '`b` (cur type {}) must be an instance of {}'.format(type(b), AttrDict) for k, v_ in a.items(): full_key = '.'.join(stack) + '.' + k if stack is not None else k # a must specify keys that are in b if k not in b: if _key_is_deprecated(full_key): continue elif _key_is_renamed(full_key): _raise_key_rename_error(full_key) else: raise KeyError('Non-existent config key: {}'.format(full_key)) v = copy.deepcopy(v_) v = _decode_cfg_value(v) v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key) # Recursively merge dicts if isinstance(v, AttrDict): try: stack_push = [k] if stack is None else stack + [k] _merge_a_into_b(v, b[k], stack=stack_push) except BaseException: raise else: b[k] = v def _key_is_deprecated(full_key): if full_key in _DEPRECATED_KEYS: logger.warn( 'Deprecated config key (ignoring): {}'.format(full_key) ) return True return False def _key_is_renamed(full_key): return full_key in _RENAMED_KEYS def _raise_key_rename_error(full_key): new_key = _RENAMED_KEYS[full_key] if isinstance(new_key, tuple): msg = ' Note: ' + new_key[1] new_key = new_key[0] else: msg = '' raise KeyError( 'Key {} was renamed to {}; please update your config.{}'. format(full_key, new_key, msg) ) def _decode_cfg_value(v): """Decodes a raw config value (e.g., from a yaml config files or command line argument) into a Python object. """ # Configs parsed from raw yaml will contain dictionary keys that need to be # converted to AttrDict objects if isinstance(v, dict): return AttrDict(v) # All remaining processing is only applied to strings if not isinstance(v, six.string_types): return v # Try to interpret `v` as a: # string, number, tuple, list, dict, boolean, or None try: v = literal_eval(v) # The following two excepts allow v to pass through when it represents a # string. # # Longer explanation: # The type of v is always a string (before calling literal_eval), but # sometimes it *represents* a string and other times a data structure, like # a list. In the case that v represents a string, what we got back from the # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is # ok with '"foo"', but will raise a ValueError if given 'foo'. In other # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval # will raise a SyntaxError. except ValueError: pass except SyntaxError: pass return v def _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key): """Checks that `value_a`, which is intended to replace `value_b` is of the right type. The type is correct if it matches exactly or is one of a few cases in which the type can be easily coerced. """ # The types must match (with some exceptions) type_b = type(value_b) type_a = type(value_a) if type_a is type_b: return value_a # Exceptions: numpy arrays, strings, tuple<->list if isinstance(value_b, np.ndarray): value_a = np.array(value_a, dtype=value_b.dtype) elif isinstance(value_b, six.string_types): value_a = str(value_a) elif isinstance(value_a, tuple) and isinstance(value_b, list): value_a = list(value_a) elif isinstance(value_a, list) and isinstance(value_b, tuple): value_a = tuple(value_a) else: raise ValueError( 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' 'key: {}'.format(type_b, type_a, value_b, value_a, full_key) ) return value_a ================================================ FILE: detectron/core/rpn_generator.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Functions for RPN proposal generation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import datetime import logging import numpy as np import os from caffe2.python import core from caffe2.python import workspace from detectron.core.config import cfg from detectron.datasets import task_evaluation from detectron.datasets.json_dataset import JsonDataset from detectron.modeling import model_builder from detectron.utils.io import save_object from detectron.utils.timer import Timer import detectron.utils.blob as blob_utils import detectron.utils.c2 as c2_utils import detectron.utils.env as envu import detectron.utils.net as nu import detectron.utils.subprocess as subprocess_utils logger = logging.getLogger(__name__) def generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, output_dir, multi_gpu=False, gpu_id=0 ): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir ) else: # Processes entire dataset range by default _boxes, _scores, _ids, rpn_file = generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) return evaluate_proposal_file(dataset, rpn_file, output_dir) def multi_gpu_generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir ): """Multi-gpu inference on a dataset.""" # Retrieve the test_net binary path binary_dir = envu.get_runtime_dir() binary_ext = envu.get_py_bin_ext() binary = os.path.join(binary_dir, 'test_net' + binary_ext) assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary) # Pass the target dataset via the command line opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)] opts += ['TEST.WEIGHTS', weights_file] # Run inference in parallel in subprocesses outputs = subprocess_utils.process_in_parallel( 'rpn_proposals', num_images, binary, output_dir, opts ) # Collate the results from each subprocess boxes, scores, ids = [], [], [] for rpn_data in outputs: boxes += rpn_data['boxes'] scores += rpn_data['scores'] ids += rpn_data['ids'] rpn_file = os.path.join(output_dir, 'rpn_proposals.pkl') cfg_yaml = envu.yaml_dump(cfg) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file def generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN roidb, start_ind, end_ind, total_num_images = get_roidb( dataset_name, ind_range ) logger.info( 'Output will be saved to: {:s}'.format(os.path.abspath(output_dir)) ) model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) nu.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) boxes, scores, ids = generate_proposals_on_roidb( model, roidb, start_ind=start_ind, end_ind=end_ind, total_num_images=total_num_images, gpu_id=gpu_id, ) cfg_yaml = envu.yaml_dump(cfg) if ind_range is not None: rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range) else: rpn_name = 'rpn_proposals.pkl' rpn_file = os.path.join(output_dir, rpn_name) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file def generate_proposals_on_roidb( model, roidb, start_ind=None, end_ind=None, total_num_images=None, gpu_id=0, ): """Generate RPN proposals on all images in an imdb.""" _t = Timer() num_images = len(roidb) roidb_boxes = [[] for _ in range(num_images)] roidb_scores = [[] for _ in range(num_images)] roidb_ids = [[] for _ in range(num_images)] if start_ind is None: start_ind = 0 end_ind = num_images total_num_images = num_images for i in range(num_images): roidb_ids[i] = roidb[i]['id'] im = cv2.imread(roidb[i]['image']) with c2_utils.NamedCudaScope(gpu_id): _t.tic() roidb_boxes[i], roidb_scores[i] = im_proposals(model, im) _t.toc() if i % 10 == 0: ave_time = _t.average_time eta_seconds = ave_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) logger.info( ( 'rpn_generate: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s (eta: {})' ).format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, ave_time, eta ) ) return roidb_boxes, roidb_scores, roidb_ids def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')] ) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores def get_roidb(dataset_name, ind_range): """Get the roidb for the dataset specified in the global cfg. Optionally restrict it to a range of indices if ind_range is a pair of integers. """ dataset = JsonDataset(dataset_name) roidb = dataset.get_roidb() if ind_range is not None: total_num_images = len(roidb) start, end = ind_range roidb = roidb[start:end] else: start = 0 end = len(roidb) total_num_images = end return roidb, start, end, total_num_images def evaluate_proposal_file(dataset, proposal_file, output_dir): """Evaluate box proposal average recall.""" roidb = dataset.get_roidb(gt=True, proposal_file=proposal_file) results = task_evaluation.evaluate_box_proposals(dataset, roidb) task_evaluation.log_box_proposal_results(results) recall_file = os.path.join(output_dir, 'rpn_proposal_recall.pkl') save_object(results, recall_file) return results ================================================ FILE: detectron/core/test.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Inference functionality for most Detectron models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import defaultdict import cv2 import logging import numpy as np from caffe2.python import core from caffe2.python import workspace import pycocotools.mask as mask_util from detectron.core.config import cfg from detectron.utils.timer import Timer import detectron.core.test_retinanet as test_retinanet import detectron.modeling.FPN as fpn import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils import detectron.utils.image as image_utils import detectron.utils.keypoints as keypoint_utils logger = logging.getLogger(__name__) def im_detect_all(model, im, box_proposals, timers=None): if timers is None: timers = defaultdict(Timer) # Handle RetinaNet testing separately for now if cfg.RETINANET.RETINANET_ON: cls_boxes = test_retinanet.im_detect_bbox(model, im, timers) return cls_boxes, None, None timers['im_detect_bbox'].tic() if cfg.TEST.BBOX_AUG.ENABLED: scores, boxes, im_scale = im_detect_bbox_aug(model, im, box_proposals) else: scores, boxes, im_scale = im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals ) timers['im_detect_bbox'].toc() # score and boxes are from the whole image after score thresholding and nms # (they are not separated by class) # cls_boxes boxes and scores are separated by class and in the format used # for evaluating results timers['misc_bbox'].tic() scores, boxes, cls_boxes = box_results_with_nms_and_limit(scores, boxes) timers['misc_bbox'].toc() if cfg.MODEL.MASK_ON and boxes.shape[0] > 0: timers['im_detect_mask'].tic() if cfg.TEST.MASK_AUG.ENABLED: masks = im_detect_mask_aug(model, im, boxes) else: masks = im_detect_mask(model, im_scale, boxes) timers['im_detect_mask'].toc() timers['misc_mask'].tic() cls_segms = segm_results( cls_boxes, masks, boxes, im.shape[0], im.shape[1] ) timers['misc_mask'].toc() else: cls_segms = None if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0: timers['im_detect_keypoints'].tic() if cfg.TEST.KPS_AUG.ENABLED: heatmaps = im_detect_keypoints_aug(model, im, boxes) else: heatmaps = im_detect_keypoints(model, im_scale, boxes) timers['im_detect_keypoints'].toc() timers['misc_keypoints'].tic() cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes) timers['misc_keypoints'].toc() else: cls_keyps = None return cls_boxes, cls_segms, cls_keyps def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _im_info = blob_utils.get_image_blob( im, target_scale, target_max_size ) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scale def im_detect_bbox_aug(model, im, box_proposals=None): """Performs bbox detection with test-time augmentations. Function signature is the same as for im_detect_bbox. """ assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \ 'Coord heuristic must be union whenever score heuristic is union' assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Score heuristic must be union whenever coord heuristic is union' assert not cfg.MODEL.FASTER_RCNN or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Union heuristic must be used to combine Faster RCNN predictions' # Collect detections computed under different transformations scores_ts = [] boxes_ts = [] def add_preds_t(scores_t, boxes_t): scores_ts.append(scores_t) boxes_ts.append(boxes_t) # Perform detection on the horizontally flipped image if cfg.TEST.BBOX_AUG.H_FLIP: scores_hf, boxes_hf, _ = im_detect_bbox_hflip( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, box_proposals=box_proposals ) add_preds_t(scores_hf, boxes_hf) # Compute detections at different scales for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE scores_scl, boxes_scl = im_detect_bbox_scale( model, im, scale, max_size, box_proposals ) add_preds_t(scores_scl, boxes_scl) if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale( model, im, scale, max_size, box_proposals, hflip=True ) add_preds_t(scores_scl_hf, boxes_scl_hf) # Perform detection at different aspect ratios for aspect_ratio in cfg.TEST.BBOX_AUG.ASPECT_RATIOS: scores_ar, boxes_ar = im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals ) add_preds_t(scores_ar, boxes_ar) if cfg.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP: scores_ar_hf, boxes_ar_hf = im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals, hflip=True ) add_preds_t(scores_ar_hf, boxes_ar_hf) # Compute detections for the original image (identity transform) last to # ensure that the Caffe2 workspace is populated with blobs corresponding # to the original image on return (postcondition of im_detect_bbox) scores_i, boxes_i, im_scale_i = im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals ) add_preds_t(scores_i, boxes_i) # Combine the predicted scores if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID': scores_c = scores_i elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG': scores_c = np.mean(scores_ts, axis=0) elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION': scores_c = np.vstack(scores_ts) else: raise NotImplementedError( 'Score heur {} not supported'.format(cfg.TEST.BBOX_AUG.SCORE_HEUR) ) # Combine the predicted boxes if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID': boxes_c = boxes_i elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG': boxes_c = np.mean(boxes_ts, axis=0) elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION': boxes_c = np.vstack(boxes_ts) else: raise NotImplementedError( 'Coord heur {} not supported'.format(cfg.TEST.BBOX_AUG.COORD_HEUR) ) return scores_c, boxes_c, im_scale_i def im_detect_bbox_hflip( model, im, target_scale, target_max_size, box_proposals=None ): """Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ # Compute predictions on the flipped image im_hf = im[:, ::-1, :] im_width = im.shape[1] if not cfg.MODEL.FASTER_RCNN: box_proposals_hf = box_utils.flip_boxes(box_proposals, im_width) else: box_proposals_hf = None scores_hf, boxes_hf, im_scale = im_detect_bbox( model, im_hf, target_scale, target_max_size, boxes=box_proposals_hf ) # Invert the detections computed on the flipped image boxes_inv = box_utils.flip_boxes(boxes_hf, im_width) return scores_hf, boxes_inv, im_scale def im_detect_bbox_scale( model, im, target_scale, target_max_size, box_proposals=None, hflip=False ): """Computes bbox detections at the given scale. Returns predictions in the original image space. """ if hflip: scores_scl, boxes_scl, _ = im_detect_bbox_hflip( model, im, target_scale, target_max_size, box_proposals=box_proposals ) else: scores_scl, boxes_scl, _ = im_detect_bbox( model, im, target_scale, target_max_size, boxes=box_proposals ) return scores_scl, boxes_scl def im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals=None, hflip=False ): """Computes bbox detections at the given width-relative aspect ratio. Returns predictions in the original image space. """ # Compute predictions on the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) if not cfg.MODEL.FASTER_RCNN: box_proposals_ar = box_utils.aspect_ratio(box_proposals, aspect_ratio) else: box_proposals_ar = None if hflip: scores_ar, boxes_ar, _ = im_detect_bbox_hflip( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, box_proposals=box_proposals_ar ) else: scores_ar, boxes_ar, _ = im_detect_bbox( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals_ar ) # Invert the detected boxes boxes_inv = box_utils.aspect_ratio(boxes_ar, 1.0 / aspect_ratio) return scores_ar, boxes_inv def im_detect_mask(model, im_scale, boxes): """Infer instance segmentation masks. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_masks (ndarray): R x K x M x M array of class specific soft masks output by the network (must be processed by segm_results to convert into hard masks in the original image coordinate space) """ M = cfg.MRCNN.RESOLUTION if boxes.shape[0] == 0: pred_masks = np.zeros((0, M, M), np.float32) return pred_masks inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'mask_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.mask_net.Proto().name) # Fetch masks pred_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_probs') ).squeeze() if cfg.MRCNN.CLS_SPECIFIC_MASK: pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M]) else: pred_masks = pred_masks.reshape([-1, 1, M, M]) return pred_masks def im_detect_mask_aug(model, im, boxes): """Performs mask detection with test-time augmentations. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): BGR image to test boxes (ndarray): R x 4 array of bounding boxes Returns: masks (ndarray): R x K x M x M array of class specific soft masks """ assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' # Collect masks computed under different transformations masks_ts = [] # Compute masks for the original image (identity transform) im_scale_i = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) masks_i = im_detect_mask(model, im_scale_i, boxes) masks_ts.append(masks_i) # Perform mask detection on the horizontally flipped image if cfg.TEST.MASK_AUG.H_FLIP: masks_hf = im_detect_mask_hflip( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes ) masks_ts.append(masks_hf) # Compute detections at different scales for scale in cfg.TEST.MASK_AUG.SCALES: max_size = cfg.TEST.MASK_AUG.MAX_SIZE masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes) masks_ts.append(masks_scl) if cfg.TEST.MASK_AUG.SCALE_H_FLIP: masks_scl_hf = im_detect_mask_scale( model, im, scale, max_size, boxes, hflip=True ) masks_ts.append(masks_scl_hf) # Compute masks at different aspect ratios for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS: masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes) masks_ts.append(masks_ar) if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP: masks_ar_hf = im_detect_mask_aspect_ratio( model, im, aspect_ratio, boxes, hflip=True ) masks_ts.append(masks_ar_hf) # Combine the predicted soft masks if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG': masks_c = np.mean(masks_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX': masks_c = np.amax(masks_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG': def logit(y): return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20)) logit_masks = [logit(y) for y in masks_ts] logit_masks = np.mean(logit_masks, axis=0) masks_c = 1.0 / (1.0 + np.exp(-logit_masks)) else: raise NotImplementedError( 'Heuristic {} not supported'.format(cfg.TEST.MASK_AUG.HEUR) ) return masks_c def im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes): """Performs mask detection on the horizontally flipped image. Function signature is the same as for im_detect_mask_aug. """ # Compute the masks for the flipped image im_hf = im[:, ::-1, :] boxes_hf = box_utils.flip_boxes(boxes, im.shape[1]) im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size) masks_hf = im_detect_mask(model, im_scale, boxes_hf) # Invert the predicted soft masks masks_inv = masks_hf[:, :, :, ::-1] return masks_inv def im_detect_mask_scale( model, im, target_scale, target_max_size, boxes, hflip=False ): """Computes masks at the given scale.""" if hflip: masks_scl = im_detect_mask_hflip( model, im, target_scale, target_max_size, boxes ) else: im_scale = im_conv_body_only(model, im, target_scale, target_max_size) masks_scl = im_detect_mask(model, im_scale, boxes) return masks_scl def im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, hflip=False): """Computes mask detections at the given width-relative aspect ratio.""" # Perform mask detection on the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio) if hflip: masks_ar = im_detect_mask_hflip( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar ) else: im_scale = im_conv_body_only( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE ) masks_ar = im_detect_mask(model, im_scale, boxes_ar) return masks_ar def im_detect_keypoints(model, im_scale, boxes): """Infer instance keypoint poses. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_heatmaps (ndarray): R x J x M x M array of keypoint location logits (softmax inputs) for each of the J keypoint types output by the network (must be processed by keypoint_results to convert into point predictions in the original image coordinate space) """ M = cfg.KRCNN.HEATMAP_SIZE if boxes.shape[0] == 0: pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32) return pred_heatmaps inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'keypoint_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.keypoint_net.Proto().name) pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze() # In case of 1 if pred_heatmaps.ndim == 3: pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0) return pred_heatmaps def im_detect_keypoints_aug(model, im, boxes): """Computes keypoint predictions with test-time augmentations. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): BGR image to test boxes (ndarray): R x 4 array of bounding boxes Returns: heatmaps (ndarray): R x J x M x M array of keypoint location logits """ # Collect heatmaps predicted under different transformations heatmaps_ts = [] # Tag predictions computed under downscaling and upscaling transformations ds_ts = [] us_ts = [] def add_heatmaps_t(heatmaps_t, ds_t=False, us_t=False): heatmaps_ts.append(heatmaps_t) ds_ts.append(ds_t) us_ts.append(us_t) # Compute the heatmaps for the original image (identity transform) im_scale = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) heatmaps_i = im_detect_keypoints(model, im_scale, boxes) add_heatmaps_t(heatmaps_i) # Perform keypoints detection on the horizontally flipped image if cfg.TEST.KPS_AUG.H_FLIP: heatmaps_hf = im_detect_keypoints_hflip( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes ) add_heatmaps_t(heatmaps_hf) # Compute detections at different scales for scale in cfg.TEST.KPS_AUG.SCALES: ds_scl = scale < cfg.TEST.SCALE us_scl = scale > cfg.TEST.SCALE heatmaps_scl = im_detect_keypoints_scale( model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes ) add_heatmaps_t(heatmaps_scl, ds_scl, us_scl) if cfg.TEST.KPS_AUG.SCALE_H_FLIP: heatmaps_scl_hf = im_detect_keypoints_scale( model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes, hflip=True ) add_heatmaps_t(heatmaps_scl_hf, ds_scl, us_scl) # Compute keypoints at different aspect ratios for aspect_ratio in cfg.TEST.KPS_AUG.ASPECT_RATIOS: heatmaps_ar = im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes ) add_heatmaps_t(heatmaps_ar) if cfg.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP: heatmaps_ar_hf = im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes, hflip=True ) add_heatmaps_t(heatmaps_ar_hf) # Select the heuristic function for combining the heatmaps if cfg.TEST.KPS_AUG.HEUR == 'HM_AVG': np_f = np.mean elif cfg.TEST.KPS_AUG.HEUR == 'HM_MAX': np_f = np.amax else: raise NotImplementedError( 'Heuristic {} not supported'.format(cfg.TEST.KPS_AUG.HEUR) ) def heur_f(hms_ts): return np_f(hms_ts, axis=0) # Combine the heatmaps if cfg.TEST.KPS_AUG.SCALE_SIZE_DEP: heatmaps_c = combine_heatmaps_size_dep( heatmaps_ts, ds_ts, us_ts, boxes, heur_f ) else: heatmaps_c = heur_f(heatmaps_ts) return heatmaps_c def im_detect_keypoints_hflip(model, im, target_scale, target_max_size, boxes): """Computes keypoint predictions on the horizontally flipped image. Function signature is the same as for im_detect_keypoints_aug. """ # Compute keypoints for the flipped image im_hf = im[:, ::-1, :] boxes_hf = box_utils.flip_boxes(boxes, im.shape[1]) im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size) heatmaps_hf = im_detect_keypoints(model, im_scale, boxes_hf) # Invert the predicted keypoints heatmaps_inv = keypoint_utils.flip_heatmaps(heatmaps_hf) return heatmaps_inv def im_detect_keypoints_scale( model, im, target_scale, target_max_size, boxes, hflip=False ): """Computes keypoint predictions at the given scale.""" if hflip: heatmaps_scl = im_detect_keypoints_hflip( model, im, target_scale, target_max_size, boxes ) else: im_scale = im_conv_body_only(model, im, target_scale, target_max_size) heatmaps_scl = im_detect_keypoints(model, im_scale, boxes) return heatmaps_scl def im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes, hflip=False ): """Detects keypoints at the given width-relative aspect ratio.""" # Perform keypoint detectionon the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio) if hflip: heatmaps_ar = im_detect_keypoints_hflip( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar ) else: im_scale = im_conv_body_only( model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE ) heatmaps_ar = im_detect_keypoints(model, im_scale, boxes_ar) return heatmaps_ar def combine_heatmaps_size_dep(hms_ts, ds_ts, us_ts, boxes, heur_f): """Combines heatmaps while taking object sizes into account.""" assert len(hms_ts) == len(ds_ts) and len(ds_ts) == len(us_ts), \ 'All sets of hms must be tagged with downscaling and upscaling flags' # Classify objects into small+medium and large based on their box areas areas = box_utils.boxes_area(boxes) sm_objs = areas < cfg.TEST.KPS_AUG.AREA_TH l_objs = areas >= cfg.TEST.KPS_AUG.AREA_TH # Combine heatmaps computed under different transformations for each object hms_c = np.zeros_like(hms_ts[0]) for i in range(hms_c.shape[0]): hms_to_combine = [] for hms_t, ds_t, us_t in zip(hms_ts, ds_ts, us_ts): # Discard downscaling predictions for small and medium objects if sm_objs[i] and ds_t: continue # Discard upscaling predictions for large objects if l_objs[i] and us_t: continue hms_to_combine.append(hms_t[i]) hms_c[i] = heur_f(hms_to_combine) return hms_c def box_results_with_nms_and_limit(scores, boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w): num_classes = cfg.MODEL.NUM_CLASSES cls_segms = [[] for _ in range(num_classes)] mask_ind = 0 # To work around an issue with cv2.resize (it seems to automatically pad # with repeated border values), we manually zero-pad the masks by 1 pixel # prior to resizing back to the original image resolution. This prevents # "top hat" artifacts. We therefore need to expand the reference boxes by an # appropriate factor. M = cfg.MRCNN.RESOLUTION scale = (M + 2.0) / M ref_boxes = box_utils.expand_boxes(ref_boxes, scale) ref_boxes = ref_boxes.astype(np.int32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) # skip j = 0, because it's the background class for j in range(1, num_classes): segms = [] for _ in range(cls_boxes[j].shape[0]): if cfg.MRCNN.CLS_SPECIFIC_MASK: padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :] else: padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :] ref_box = ref_boxes[mask_ind, :] w = ref_box[2] - ref_box[0] + 1 h = ref_box[3] - ref_box[1] + 1 w = np.maximum(w, 1) h = np.maximum(h, 1) mask = cv2.resize(padded_mask, (w, h)) mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8) im_mask = np.zeros((im_h, im_w), dtype=np.uint8) x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, im_w) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, im_h) im_mask[y_0:y_1, x_0:x_1] = mask[ (y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0]) ] # Get RLE encoding used by the COCO evaluation API rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F') )[0] segms.append(rle) mask_ind += 1 cls_segms[j] = segms assert mask_ind == masks.shape[0] return cls_segms def keypoint_results(cls_boxes, pred_heatmaps, ref_boxes): num_classes = cfg.MODEL.NUM_CLASSES cls_keyps = [[] for _ in range(num_classes)] person_idx = keypoint_utils.get_person_class_index() xy_preds = keypoint_utils.heatmaps_to_keypoints(pred_heatmaps, ref_boxes) # NMS OKS if cfg.KRCNN.NMS_OKS: keep = keypoint_utils.nms_oks(xy_preds, ref_boxes, 0.3) xy_preds = xy_preds[keep, :, :] ref_boxes = ref_boxes[keep, :] pred_heatmaps = pred_heatmaps[keep, :, :, :] cls_boxes[person_idx] = cls_boxes[person_idx][keep, :] kps = [xy_preds[i] for i in range(xy_preds.shape[0])] cls_keyps[person_idx] = kps return cls_keyps def _get_rois_blob(im_rois, im_scale): """Converts RoIs into network inputs. Arguments: im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates im_scale_factors (list): scale factors as returned by _get_image_blob Returns: blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns [level, x1, y1, x2, y2] """ rois, levels = _project_im_rois(im_rois, im_scale) rois_blob = np.hstack((levels, rois)) return rois_blob.astype(np.float32, copy=False) def _project_im_rois(im_rois, scales): """Project image RoIs into the image pyramid built by _get_image_blob. Arguments: im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates scales (list): scale factors as returned by _get_image_blob Returns: rois (ndarray): R x 4 matrix of projected RoI coordinates levels (ndarray): image pyramid levels used by each projected RoI """ rois = im_rois.astype(float, copy=False) * scales levels = np.zeros((im_rois.shape[0], 1), dtype=int) return rois, levels def _add_multilevel_rois_for_test(blobs, name): """Distributes a set of RoIs across FPN pyramid levels by creating new level specific RoI blobs. Arguments: blobs (dict): dictionary of blobs name (str): a key in 'blobs' identifying the source RoI blob Returns: [by ref] blobs (dict): new keys named by `name + 'fpn' + level` are added to dict each with a value that's an R_level x 5 ndarray of RoIs (see _get_rois_blob for format) """ lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL lvls = fpn.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max) fpn.add_multilevel_roi_blobs( blobs, name, blobs[name], lvls, lvl_min, lvl_max ) def _get_blobs(im, rois, target_scale, target_max_size): """Convert an image and RoIs within that image into network inputs.""" blobs = {} blobs['data'], im_scale, blobs['im_info'] = \ blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) return blobs, im_scale ================================================ FILE: detectron/core/test_engine.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Test a Detectron network on an imdb (image database).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import defaultdict import cv2 import datetime import logging import numpy as np import os from caffe2.python import workspace from detectron.core.config import cfg from detectron.core.config import get_output_dir from detectron.core.rpn_generator import generate_rpn_on_dataset from detectron.core.rpn_generator import generate_rpn_on_range from detectron.core.test import im_detect_all from detectron.datasets import task_evaluation from detectron.datasets.json_dataset import JsonDataset from detectron.modeling import model_builder from detectron.utils.io import save_object from detectron.utils.timer import Timer import detectron.utils.c2 as c2_utils import detectron.utils.env as envu import detectron.utils.net as net_utils import detectron.utils.subprocess as subprocess_utils import detectron.utils.vis as vis_utils logger = logging.getLogger(__name__) def get_eval_functions(): # Determine which parent or child function should handle inference if cfg.MODEL.RPN_ONLY: child_func = generate_rpn_on_range parent_func = generate_rpn_on_dataset else: # Generic case that handles all network types other than RPN-only nets # and RetinaNet child_func = test_net parent_func = test_net_on_dataset return parent_func, child_func def get_inference_dataset(index, is_parent=True): assert is_parent or len(cfg.TEST.DATASETS) == 1, \ 'The child inference process can only work on a single dataset' dataset_name = cfg.TEST.DATASETS[index] if cfg.TEST.PRECOMPUTED_PROPOSALS: assert is_parent or len(cfg.TEST.PROPOSAL_FILES) == 1, \ 'The child inference process can only work on a single proposal file' assert len(cfg.TEST.PROPOSAL_FILES) == len(cfg.TEST.DATASETS), \ 'If proposals are used, one proposal file must be specified for ' \ 'each dataset' proposal_file = cfg.TEST.PROPOSAL_FILES[index] else: proposal_file = None return dataset_name, proposal_file def run_inference( weights_file, ind_range=None, multi_gpu_testing=False, gpu_id=0, check_expected_results=False, ): parent_func, child_func = get_eval_functions() is_parent = ind_range is None def result_getter(): if is_parent: # Parent case: # In this case we're either running inference on the entire dataset in a # single process or (if multi_gpu_testing is True) using this process to # launch subprocesses that each run inference on a range of the dataset all_results = {} for i in range(len(cfg.TEST.DATASETS)): dataset_name, proposal_file = get_inference_dataset(i) output_dir = get_output_dir(dataset_name, training=False) results = parent_func( weights_file, dataset_name, proposal_file, output_dir, multi_gpu=multi_gpu_testing ) all_results.update(results) return all_results else: # Subprocess child case: # In this case test_net was called via subprocess.Popen to execute on a # range of inputs on a single dataset dataset_name, proposal_file = get_inference_dataset(0, is_parent=False) output_dir = get_output_dir(dataset_name, training=False) return child_func( weights_file, dataset_name, proposal_file, output_dir, ind_range=ind_range, gpu_id=gpu_id ) all_results = result_getter() if check_expected_results and is_parent: task_evaluation.check_expected_results( all_results, atol=cfg.EXPECTED_RESULTS_ATOL, rtol=cfg.EXPECTED_RESULTS_RTOL ) task_evaluation.log_copy_paste_friendly_results(all_results) return all_results def test_net_on_dataset( weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0 ): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir ) else: all_boxes, all_segms, all_keyps = test_net( weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) results = task_evaluation.evaluate_all( dataset, all_boxes, all_segms, all_keyps, output_dir ) return results def multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir ): """Multi-gpu inference on a dataset.""" binary_dir = envu.get_runtime_dir() binary_ext = envu.get_py_bin_ext() binary = os.path.join(binary_dir, 'test_net' + binary_ext) assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary) # Pass the target dataset and proposal file (if any) via the command line opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)] opts += ['TEST.WEIGHTS', weights_file] if proposal_file: opts += ['TEST.PROPOSAL_FILES', '("{}",)'.format(proposal_file)] # Run inference in parallel in subprocesses # Outputs will be a list of outputs from each subprocess, where the output # of each subprocess is the dictionary saved by test_net(). outputs = subprocess_utils.process_in_parallel( 'detection', num_images, binary, output_dir, opts ) # Collate the results from each subprocess all_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] all_segms = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] all_keyps = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for det_data in outputs: all_boxes_batch = det_data['all_boxes'] all_segms_batch = det_data['all_segms'] all_keyps_batch = det_data['all_keyps'] for cls_idx in range(1, cfg.MODEL.NUM_CLASSES): all_boxes[cls_idx] += all_boxes_batch[cls_idx] all_segms[cls_idx] += all_segms_batch[cls_idx] all_keyps[cls_idx] += all_keyps_batch[cls_idx] det_file = os.path.join(output_dir, 'detections.pkl') cfg_yaml = envu.yaml_dump(cfg) save_object( dict( all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml ), det_file ) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps def test_net( weights_file, dataset_name, proposal_file, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert not cfg.MODEL.RPN_ONLY, \ 'Use rpn_generate to generate proposals from RPN-only models' roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset( dataset_name, proposal_file, ind_range ) model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images) timers = defaultdict(Timer) for i, entry in enumerate(roidb): if cfg.TEST.PRECOMPUTED_PROPOSALS: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue else: # Faster R-CNN type models generate proposals on-the-fly with an # in-network RPN; 1-stage models don't require proposals. box_proposals = None im = cv2.imread(entry['image']) with c2_utils.NamedCudaScope(gpu_id): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers ) extend_results(i, all_boxes, cls_boxes_i) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = ( timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time + timers['im_detect_keypoints'].average_time ) misc_time = ( timers['misc_bbox'].average_time + timers['misc_mask'].average_time + timers['misc_keypoints'].average_time ) logger.info( ( 'im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})' ).format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta ) ) if cfg.VIS: im_name = os.path.splitext(os.path.basename(entry['image']))[0] vis_utils.vis_one_image( im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i, segms=cls_segms_i, keypoints=cls_keyps_i, thresh=cfg.VIS_TH, box_alpha=0.8, dataset=dataset, show_class=True ) cfg_yaml = envu.yaml_dump(cfg) if ind_range is not None: det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) save_object( dict( all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml ), det_file ) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps def initialize_model_from_cfg(weights_file, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) return model def get_roidb_and_dataset(dataset_name, proposal_file, ind_range): """Get the roidb for the dataset specified in the global cfg. Optionally restrict it to a range of indices if ind_range is a pair of integers. """ dataset = JsonDataset(dataset_name) if cfg.TEST.PRECOMPUTED_PROPOSALS: assert proposal_file, 'No proposal file given' roidb = dataset.get_roidb( proposal_file=proposal_file, proposal_limit=cfg.TEST.PROPOSAL_LIMIT ) else: roidb = dataset.get_roidb() if ind_range is not None: total_num_images = len(roidb) start, end = ind_range roidb = roidb[start:end] else: start = 0 end = len(roidb) total_num_images = end return roidb, dataset, start, end, total_num_images def empty_results(num_classes, num_images): """Return empty results lists for boxes, masks, and keypoints. Box detections are collected into: all_boxes[cls][image] = N x 5 array with columns (x1, y1, x2, y2, score) Instance mask predictions are collected into: all_segms[cls][image] = [...] list of COCO RLE encoded masks that are in 1:1 correspondence with the boxes in all_boxes[cls][image] Keypoint predictions are collected into: all_keyps[cls][image] = [...] list of keypoints results, each encoded as a 3D array (#rois, 4, #keypoints) with the 4 rows corresponding to [x, y, logit, prob] (See: utils.keypoints.heatmaps_to_keypoints). Keypoints are recorded for person (cls = 1); they are in 1:1 correspondence with the boxes in all_boxes[cls][image]. """ # Note: do not be tempted to use [[] * N], which gives N references to the # *same* empty list. all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_segms = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_keyps = [[[] for _ in range(num_images)] for _ in range(num_classes)] return all_boxes, all_segms, all_keyps def extend_results(index, all_res, im_res): """Add results for an image to the set of all results at the specified index. """ # Skip cls_idx 0 (__background__) for cls_idx in range(1, len(im_res)): all_res[cls_idx][index] = im_res[cls_idx] ================================================ FILE: detectron/core/test_retinanet.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Test a RetinaNet network on an image database""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import logging from collections import defaultdict from caffe2.python import core, workspace from detectron.core.config import cfg from detectron.modeling.generate_anchors import generate_anchors from detectron.utils.timer import Timer import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils logger = logging.getLogger(__name__) def _create_cell_anchors(): """ Generate all types of anchors for all fpn levels/scales/aspect ratios. This function is called only once at the beginning of inference. """ k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE A = scales_per_octave * len(aspect_ratios) anchors = {} for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = np.zeros((A, 4)) a = 0 for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for aspect in aspect_ratios: anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect, ) cell_anchors[a, :] = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios) a += 1 anchors[lvl] = cell_anchors return anchors def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here if cfg.TEST.SOFT_NMS.ENABLED: cls_dets, keep = box_utils.soft_nms( cls_dets, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes ================================================ FILE: detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m ================================================ function VOCopts = get_voc_opts(path) tmp = pwd; cd(path); try addpath('VOCcode'); VOCinit; catch rmpath('VOCcode'); cd(tmp); error(sprintf('VOCcode directory not found under %s', path)); end rmpath('VOCcode'); cd(tmp); ================================================ FILE: detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m ================================================ function res = voc_eval(path, comp_id, test_set, output_dir) VOCopts = get_voc_opts(path); VOCopts.testset = test_set; for i = 1:length(VOCopts.classes) cls = VOCopts.classes{i}; res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); end fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); fprintf('Results:\n'); aps = [res(:).ap]'; fprintf('%.1f\n', aps * 100); fprintf('%.1f\n', mean(aps) * 100); fprintf('~~~~~~~~~~~~~~~~~~~~\n'); function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) test_set = VOCopts.testset; year = VOCopts.dataset(4:end); addpath(fullfile(VOCopts.datadir, 'VOCcode')); res_fn = sprintf(VOCopts.detrespath, comp_id, cls); recall = []; prec = []; ap = 0; ap_auc = 0; do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); if do_eval % Bug in VOCevaldet requires that tic has been called first tic; [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); ap_auc = xVOCap(recall, prec); % force plot limits ylim([0 1]); xlim([0 1]); print(gcf, '-djpeg', '-r0', ... [output_dir '/' cls '_pr.jpg']); end fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); res.recall = recall; res.prec = prec; res.ap = ap; res.ap_auc = ap_auc; save([output_dir '/' cls '_pr.mat'], ... 'res', 'recall', 'prec', 'ap', 'ap_auc'); rmpath(fullfile(VOCopts.datadir, 'VOCcode')); ================================================ FILE: detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m ================================================ function ap = xVOCap(rec,prec) % From the PASCAL VOC 2011 devkit mrec=[0 ; rec ; 1]; mpre=[0 ; prec ; 0]; for i=numel(mpre)-1:-1:1 mpre(i)=max(mpre(i),mpre(i+1)); end i=find(mrec(2:end)~=mrec(1:end-1))+1; ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); ================================================ FILE: detectron/datasets/__init__.py ================================================ ================================================ FILE: detectron/datasets/cityscapes_json_dataset_evaluator.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Functions for evaluating results on Cityscapes.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import logging import os import uuid import pycocotools.mask as mask_util from detectron.core.config import cfg from detectron.datasets.dataset_catalog import get_raw_dir logger = logging.getLogger(__name__) def evaluate_masks( json_dataset, all_boxes, all_segms, output_dir, use_salt=True, cleanup=False ): if cfg.CLUSTER.ON_CLUSTER: # On the cluster avoid saving these files in the job directory output_dir = '/tmp' res_file = os.path.join( output_dir, 'segmentations_' + json_dataset.name + '_results') if use_salt: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' results_dir = os.path.join(output_dir, 'results') if not os.path.exists(results_dir): os.mkdir(results_dir) os.environ['CITYSCAPES_DATASET'] = get_raw_dir(json_dataset.name) os.environ['CITYSCAPES_RESULTS'] = output_dir # Load the Cityscapes eval script *after* setting the required env vars, # since the script reads their values into global variables (at load time). import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ as cityscapes_eval roidb = json_dataset.get_roidb() for i, entry in enumerate(roidb): im_name = entry['image'] basename = os.path.splitext(os.path.basename(im_name))[0] txtname = os.path.join(output_dir, basename + 'pred.txt') with open(txtname, 'w') as fid_txt: if i % 10 == 0: logger.info('i: {}: {}'.format(i, basename)) for j in range(1, len(all_segms)): clss = json_dataset.classes[j] clss_id = cityscapes_eval.name2label[clss].id segms = all_segms[j][i] boxes = all_boxes[j][i] if segms == []: continue masks = mask_util.decode(segms) for k in range(boxes.shape[0]): score = boxes[k, -1] mask = masks[:, :, k] pngname = os.path.join( 'results', basename + '_' + clss + '_{}.png'.format(k)) # write txt fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score)) # save mask cv2.imwrite(os.path.join(output_dir, pngname), mask * 255) logger.info('Evaluating...') cityscapes_eval.main([]) return None ================================================ FILE: detectron/datasets/coco_to_cityscapes_id.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # mapping coco categories to cityscapes (our converted json) id # cityscapes # INFO roidb.py: 220: 1 bicycle: 7286 # INFO roidb.py: 220: 2 car: 53684 # INFO roidb.py: 220: 3 person: 35704 # INFO roidb.py: 220: 4 train: 336 # INFO roidb.py: 220: 5 truck: 964 # INFO roidb.py: 220: 6 motorcycle: 1468 # INFO roidb.py: 220: 7 bus: 758 # INFO roidb.py: 220: 8 rider: 3504 # coco (val5k) # INFO roidb.py: 220: 1 person: 21296 # INFO roidb.py: 220: 2 bicycle: 628 # INFO roidb.py: 220: 3 car: 3818 # INFO roidb.py: 220: 4 motorcycle: 732 # INFO roidb.py: 220: 5 airplane: 286 <------ irrelevant # INFO roidb.py: 220: 6 bus: 564 # INFO roidb.py: 220: 7 train: 380 # INFO roidb.py: 220: 8 truck: 828 def cityscapes_to_coco(cityscapes_id): lookup = { 0: 0, # ... background 1: 2, # bicycle 2: 3, # car 3: 1, # person 4: 7, # train 5: 8, # truck 6: 4, # motorcycle 7: 6, # bus 8: -1, # rider (-1 means rand init) } return lookup[cityscapes_id] def cityscapes_to_coco_with_rider(cityscapes_id): lookup = { 0: 0, # ... background 1: 2, # bicycle 2: 3, # car 3: 1, # person 4: 7, # train 5: 8, # truck 6: 4, # motorcycle 7: 6, # bus 8: 1, # rider ("person", *rider has human right!*) } return lookup[cityscapes_id] def cityscapes_to_coco_without_person_rider(cityscapes_id): lookup = { 0: 0, # ... background 1: 2, # bicycle 2: 3, # car 3: -1, # person (ignore) 4: 7, # train 5: 8, # truck 6: 4, # motorcycle 7: 6, # bus 8: -1, # rider (ignore) } return lookup[cityscapes_id] def cityscapes_to_coco_all_random(cityscapes_id): lookup = { 0: -1, # ... background 1: -1, # bicycle 2: -1, # car 3: -1, # person (ignore) 4: -1, # train 5: -1, # truck 6: -1, # motorcycle 7: -1, # bus 8: -1, # rider (ignore) } return lookup[cityscapes_id] ================================================ FILE: detectron/datasets/data/README.md ================================================ # Setting Up Datasets This directory contains symlinks to data locations. ## Creating Symlinks for COCO Symlink the COCO dataset: ``` ln -s /path/to/coco $DETECTRON/detectron/datasets/data/coco ``` We assume that your local COCO dataset copy at `/path/to/coco` has the following directory structure: ``` coco |_ coco_train2014 | |_ .jpg | |_ ... | |_ .jpg |_ coco_val2014 |_ ... |_ annotations |_ instances_train2014.json |_ ... ``` If that is not the case, you may need to do something similar to: ``` mkdir -p $DETECTRON/detectron/datasets/data/coco ln -s /path/to/coco_train2014 $DETECTRON/detectron/datasets/data/coco/coco_train2014 ln -s /path/to/coco_val2014 $DETECTRON/detectron/datasets/data/coco/coco_val2014 ln -s /path/to/json/annotations $DETECTRON/detectron/datasets/data/coco/annotations ``` ### COCO Minival Annotations Our custom `minival` and `valminusminival` annotations are available for download [here](https://dl.fbaipublicfiles.com/detectron/coco/coco_annotations_minival.tgz). Please note that `minival` is exactly equivalent to the recently defined 2017 `val` set. Similarly, the union of `valminusminival` and the 2014 `train` is exactly equivalent to the 2017 `train` set. To complete installation of the COCO dataset, you will need to copy the `minival` and `valminusminival` json annotation files to the `coco/annotations` directory referenced above. ## Creating Symlinks for PASCAL VOC We assume that your symlinked `detectron/datasets/data/VOC` directory has the following structure: ``` VOC |_ JPEGImages | |_ .jpg | |_ ... | |_ .jpg |_ annotations | |_ voc__train.json | |_ voc__val.json | |_ ... |_ VOCdevkit ``` Create symlinks for `VOC`: ``` mkdir -p $DETECTRON/detectron/datasets/data/VOC ln -s /path/to/VOC/JPEGImages $DETECTRON/detectron/datasets/data/VOC/JPEGImages ln -s /path/to/VOC/json/annotations $DETECTRON/detectron/datasets/data/VOC/annotations ln -s /path/to/VOC/devkit $DETECTRON/detectron/datasets/data/VOC/VOCdevkit ``` ### PASCAL VOC Annotations in COCO Format We expect PASCAL VOC annotations converted to COCO json format, which are available for download [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip ). ## Creating Symlinks for Cityscapes: We assume that your symlinked `detectron/datasets/data/cityscapes` directory has the following structure: ``` cityscapes |_ images | |_ .jpg | |_ ... | |_ .jpg |_ annotations | |_ instanceonly_gtFile_train.json | |_ ... |_ raw |_ gtFine |_ ... |_ README.md ``` Create symlinks for `cityscapes`: ``` mkdir -p $DETECTRON/detectron/datasets/data/cityscapes ln -s /path/to/cityscapes/images $DETECTRON/detectron/datasets/data/cityscapes/images ln -s /path/to/cityscapes/json/annotations $DETECTRON/detectron/datasets/data/cityscapes/annotations ln -s /path/to/cityscapes/root $DETECTRON/detectron/datasets/data/cityscapes/raw ``` ### Cityscapes Annotations in COCO Format We expect Cityscapes annotations converted to COCO json format, which we will make available for download soon. ================================================ FILE: detectron/datasets/dataset_catalog.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Collection of available datasets.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import os # Path to data dir _DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') # Required dataset entry keys _IM_DIR = 'image_directory' _ANN_FN = 'annotation_file' # Optional dataset entry keys _IM_PREFIX = 'image_prefix' _DEVKIT_DIR = 'devkit_directory' _RAW_DIR = 'raw_dir' # Available datasets _DATASETS = { 'cityscapes_fine_instanceonly_seg_train': { _IM_DIR: _DATA_DIR + '/cityscapes/images', _ANN_FN: _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_train.json', _RAW_DIR: _DATA_DIR + '/cityscapes/raw' }, 'cityscapes_fine_instanceonly_seg_val': { _IM_DIR: _DATA_DIR + '/cityscapes/images', # use filtered validation as there is an issue converting contours _ANN_FN: _DATA_DIR + '/cityscapes/annotations/instancesonly_filtered_gtFine_val.json', _RAW_DIR: _DATA_DIR + '/cityscapes/raw' }, 'cityscapes_fine_instanceonly_seg_test': { _IM_DIR: _DATA_DIR + '/cityscapes/images', _ANN_FN: _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_test.json', _RAW_DIR: _DATA_DIR + '/cityscapes/raw' }, 'coco_2014_train': { _IM_DIR: _DATA_DIR + '/coco/coco_train2014', _ANN_FN: _DATA_DIR + '/coco/annotations/instances_train2014.json' }, 'coco_2014_val': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/instances_val2014.json' }, 'coco_2014_minival': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/instances_minival2014.json' }, 'coco_2014_valminusminival': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/instances_valminusminival2014.json' }, 'coco_2015_test': { _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test2015.json' }, 'coco_2015_test-dev': { _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json' }, 'coco_2017_test': { # 2017 test uses 2015 test images _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test2017.json', _IM_PREFIX: 'COCO_test2015_' }, 'coco_2017_test-dev': { # 2017 test-dev uses 2015 test images _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test-dev2017.json', _IM_PREFIX: 'COCO_test2015_' }, 'coco_stuff_train': { _IM_DIR: _DATA_DIR + '/coco/coco_train2014', _ANN_FN: _DATA_DIR + '/coco/annotations/coco_stuff_train.json' }, 'coco_stuff_val': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/coco_stuff_val.json' }, 'keypoints_coco_2014_train': { _IM_DIR: _DATA_DIR + '/coco/coco_train2014', _ANN_FN: _DATA_DIR + '/coco/annotations/person_keypoints_train2014.json' }, 'keypoints_coco_2014_val': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/person_keypoints_val2014.json' }, 'keypoints_coco_2014_minival': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/person_keypoints_minival2014.json' }, 'keypoints_coco_2014_valminusminival': { _IM_DIR: _DATA_DIR + '/coco/coco_val2014', _ANN_FN: _DATA_DIR + '/coco/annotations/person_keypoints_valminusminival2014.json' }, 'keypoints_coco_2015_test': { _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test2015.json' }, 'keypoints_coco_2015_test-dev': { _IM_DIR: _DATA_DIR + '/coco/coco_test2015', _ANN_FN: _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json' }, 'voc_2007_train': { _IM_DIR: _DATA_DIR + '/VOC2007/JPEGImages', _ANN_FN: _DATA_DIR + '/VOC2007/annotations/voc_2007_train.json', _DEVKIT_DIR: _DATA_DIR + '/VOC2007/VOCdevkit2007' }, 'voc_2007_val': { _IM_DIR: _DATA_DIR + '/VOC2007/JPEGImages', _ANN_FN: _DATA_DIR + '/VOC2007/annotations/voc_2007_val.json', _DEVKIT_DIR: _DATA_DIR + '/VOC2007/VOCdevkit2007' }, 'voc_2007_test': { _IM_DIR: _DATA_DIR + '/VOC2007/JPEGImages', _ANN_FN: _DATA_DIR + '/VOC2007/annotations/voc_2007_test.json', _DEVKIT_DIR: _DATA_DIR + '/VOC2007/VOCdevkit2007' }, 'voc_2012_train': { _IM_DIR: _DATA_DIR + '/VOC2012/JPEGImages', _ANN_FN: _DATA_DIR + '/VOC2012/annotations/voc_2012_train.json', _DEVKIT_DIR: _DATA_DIR + '/VOC2012/VOCdevkit2012' }, 'voc_2012_val': { _IM_DIR: _DATA_DIR + '/VOC2012/JPEGImages', _ANN_FN: _DATA_DIR + '/VOC2012/annotations/voc_2012_val.json', _DEVKIT_DIR: _DATA_DIR + '/VOC2012/VOCdevkit2012' } } def datasets(): """Retrieve the list of available dataset names.""" return _DATASETS.keys() def contains(name): """Determine if the dataset is in the catalog.""" return name in _DATASETS.keys() def get_im_dir(name): """Retrieve the image directory for the dataset.""" return _DATASETS[name][_IM_DIR] def get_ann_fn(name): """Retrieve the annotation file for the dataset.""" return _DATASETS[name][_ANN_FN] def get_im_prefix(name): """Retrieve the image prefix for the dataset.""" return _DATASETS[name][_IM_PREFIX] if _IM_PREFIX in _DATASETS[name] else '' def get_devkit_dir(name): """Retrieve the devkit dir for the dataset.""" return _DATASETS[name][_DEVKIT_DIR] def get_raw_dir(name): """Retrieve the raw dir for the dataset.""" return _DATASETS[name][_RAW_DIR] ================================================ FILE: detectron/datasets/dummy_datasets.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Provide stub objects that can act as stand-in "dummy" datasets for simple use cases, like getting all classes in a dataset. This exists so that demos can be run without requiring users to download/install datasets first. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.utils.collections import AttrDict def get_coco_dataset(): """A dummy COCO dataset that includes only the 'classes' field.""" ds = AttrDict() classes = [ '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] ds.classes = {i: name for i, name in enumerate(classes)} return ds ================================================ FILE: detectron/datasets/json_dataset.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Representation of the standard COCO json dataset format. When working with a new dataset, we strongly suggest to convert the dataset into the COCO json format and use the existing code; it is not recommended to write code to support new dataset formats. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import copy import logging import numpy as np import os import scipy.sparse # Must happen before importing COCO API (which imports matplotlib) import detectron.utils.env as envu envu.set_up_matplotlib() # COCO API from pycocotools import mask as COCOmask from pycocotools.coco import COCO from detectron.core.config import cfg from detectron.utils.timer import Timer import detectron.datasets.dataset_catalog as dataset_catalog import detectron.utils.boxes as box_utils from detectron.utils.io import load_object import detectron.utils.segms as segm_utils logger = logging.getLogger(__name__) class JsonDataset: """A class representing a COCO json dataset.""" def __init__(self, name): assert dataset_catalog.contains(name), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(dataset_catalog.get_im_dir(name)), \ 'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name)) assert os.path.exists(dataset_catalog.get_ann_fn(name)), \ 'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name)) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = dataset_catalog.get_im_dir(name) self.image_prefix = dataset_catalog.get_im_prefix(name) self.COCO = COCO(dataset_catalog.get_ann_fn(name)) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() def get_roidb( self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0 ): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug( '_add_gt_annotations took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file( roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh ) logger.debug( '_add_proposals_from_file took {:.3f}s'. format(self.debug_timer.toc(average=False)) ) _add_class_assignments(roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join( self.image_directory, self.image_prefix + entry['file_name'] ) assert os.path.exists(im_path), 'Image \'{}\' not found'.format(im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32) ) entry['is_crowd'] = np.empty((0), dtype=bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty( (0, 3, self.num_keypoints), dtype=np.int32 ) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded if segm_utils.is_poly(obj['segmentation']): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(int).astype(float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints def _add_proposals_from_file( self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh ): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) proposals = load_object(proposal_file) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _remove_proposals_not_in_roidb(proposals, roidb, id_field) _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image( boxes, entry['height'], entry['width'] ) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle'} def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps def add_proposals(roidb, rois, scales, crowd_thresh): """Add proposal boxes (rois) to an roidb that has ground-truth annotations but no proposals. If the proposals are not at the original image scale, specify the scale factor that separate them in scales. """ box_list = [] for i in range(len(roidb)): inv_im_scale = 1. / scales[i] idx = np.where(rois[:, 0] == i)[0] box_list.append(rois[idx, 1:] * inv_im_scale) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) _add_class_assignments(roidb) def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros( (num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype ) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append( entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype) ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) ) def _filter_crowd_proposals(roidb, crowd_thresh): """Finds proposals that are inside crowd regions and marks them as overlap = -1 with each ground-truth rois, which means they will be excluded from training. """ for entry in roidb: gt_overlaps = entry['gt_overlaps'].toarray() crowd_inds = np.where(entry['is_crowd'] == 1)[0] non_gt_inds = np.where(entry['gt_classes'] == 0)[0] if len(crowd_inds) == 0 or len(non_gt_inds) == 0: continue crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :]) non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :]) iscrowd_flags = [int(True)] * len(crowd_inds) ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags) bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0] gt_overlaps[non_gt_inds[bad_inds], :] = -1 entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps) def _add_class_assignments(roidb): """Compute object category assignment for each box associated with each roidb entry. """ for entry in roidb: gt_overlaps = entry['gt_overlaps'].toarray() # max overlap with gt over classes (columns) max_overlaps = gt_overlaps.max(axis=1) # gt class that had the max overlap max_classes = gt_overlaps.argmax(axis=1) entry['max_classes'] = max_classes entry['max_overlaps'] = max_overlaps # sanity checks # if max overlap is 0, the class must be background (class 0) zero_inds = np.where(max_overlaps == 0)[0] assert all(max_classes[zero_inds] == 0) # if max overlap > 0, the class must be a fg class (not class 0) nonzero_inds = np.where(max_overlaps > 0)[0] assert all(max_classes[nonzero_inds] != 0) def _sort_proposals(proposals, id_field): """Sort proposals by the specified id field.""" order = np.argsort(proposals[id_field]) fields_to_sort = ['boxes', id_field, 'scores'] for k in fields_to_sort: proposals[k] = [proposals[k][i] for i in order] def _remove_proposals_not_in_roidb(proposals, roidb, id_field): # fix proposals so they don't contain entries for images not in the roidb roidb_ids = set({entry["id"] for entry in roidb}) keep = [i for i, id in enumerate(proposals[id_field]) if id in roidb_ids] for f in ['boxes', id_field, 'scores']: proposals[f] = [proposals[f][i] for i in keep] ================================================ FILE: detectron/datasets/json_dataset_evaluator.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Functions for evaluating results computed for a json dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import json import logging import numpy as np import os import six import uuid from pycocotools.cocoeval import COCOeval from detectron.core.config import cfg from detectron.utils.io import save_object import detectron.utils.boxes as box_utils logger = logging.getLogger(__name__) def evaluate_masks( json_dataset, all_boxes, all_segms, output_dir, use_salt=True, cleanup=False ): res_file = os.path.join( output_dir, 'segmentations_' + json_dataset.name + '_results' ) if use_salt: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' _write_coco_segms_results_file( json_dataset, all_boxes, all_segms, res_file) # Only do evaluation on non-test sets (annotations are undisclosed on test) if json_dataset.name.find('test') == -1: coco_eval = _do_segmentation_eval(json_dataset, res_file, output_dir) else: logger.warning( '{} eval ignored as annotations are undisclosed on test: {} ignored' .format("Segmentation", json_dataset.name) ) coco_eval = None # Optionally cleanup results json file if cleanup: os.remove(res_file) return coco_eval def _write_coco_segms_results_file( json_dataset, all_boxes, all_segms, res_file ): # [{"image_id": 42, # "category_id": 18, # "segmentation": [...], # "score": 0.236}, ...] results = [] for cls_ind, cls in enumerate(json_dataset.classes): if cls == '__background__': continue if cls_ind >= len(all_boxes): break cat_id = json_dataset.category_to_id_map[cls] results.extend(_coco_segms_results_one_category( json_dataset, all_boxes[cls_ind], all_segms[cls_ind], cat_id)) logger.info( 'Writing segmentation results json to: {}'.format( os.path.abspath(res_file))) with open(res_file, 'w') as fid: # "counts" is an array encoded by mask_util as a byte-stream. Python3's # json writer which /always produces strings/ cannot serialize a bytestream # unless you decode it. Thankfully, utf-8 works out (which is also what # the pycocotools/_mask.pyx does. if six.PY3: for r in results: rle = r['segmentation'] if 'counts' in rle: rle['counts'] = rle['counts'].decode("utf8") json.dump(results, fid) def _coco_segms_results_one_category(json_dataset, boxes, segms, cat_id): results = [] image_ids = json_dataset.COCO.getImgIds() image_ids.sort() assert len(boxes) == len(image_ids) assert len(segms) == len(image_ids) for i, image_id in enumerate(image_ids): dets = boxes[i] rles = segms[i] if isinstance(dets, list) and len(dets) == 0: continue dets = dets.astype(float) scores = dets[:, -1] results.extend( [{'image_id': image_id, 'category_id': cat_id, 'segmentation': rles[k], 'score': scores[k]} for k in range(dets.shape[0])]) return results def _do_segmentation_eval(json_dataset, res_file, output_dir): coco_dt = json_dataset.COCO.loadRes(str(res_file)) coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'segm') coco_eval.evaluate() coco_eval.accumulate() _log_detection_eval_metrics(json_dataset, coco_eval) eval_file = os.path.join(output_dir, 'segmentation_results.pkl') save_object(coco_eval, eval_file) logger.info('Wrote json eval results to: {}'.format(eval_file)) return coco_eval def evaluate_boxes( json_dataset, all_boxes, output_dir, use_salt=True, cleanup=False ): res_file = os.path.join( output_dir, 'bbox_' + json_dataset.name + '_results' ) if use_salt: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' _write_coco_bbox_results_file(json_dataset, all_boxes, res_file) # Only do evaluation on non-test sets (annotations are undisclosed on test) if json_dataset.name.find('test') == -1: coco_eval = _do_detection_eval(json_dataset, res_file, output_dir) else: logger.warning( '{} eval ignored as annotations are undisclosed on test: {} ignored' .format("Bbox", json_dataset.name) ) coco_eval = None # Optionally cleanup results json file if cleanup: os.remove(res_file) return coco_eval def _write_coco_bbox_results_file(json_dataset, all_boxes, res_file): # [{"image_id": 42, # "category_id": 18, # "bbox": [258.15,41.29,348.26,243.78], # "score": 0.236}, ...] results = [] for cls_ind, cls in enumerate(json_dataset.classes): if cls == '__background__': continue if cls_ind >= len(all_boxes): break cat_id = json_dataset.category_to_id_map[cls] results.extend(_coco_bbox_results_one_category( json_dataset, all_boxes[cls_ind], cat_id)) logger.info( 'Writing bbox results json to: {}'.format(os.path.abspath(res_file))) with open(res_file, 'w') as fid: json.dump(results, fid) def _coco_bbox_results_one_category(json_dataset, boxes, cat_id): results = [] image_ids = json_dataset.COCO.getImgIds() image_ids.sort() assert len(boxes) == len(image_ids) for i, image_id in enumerate(image_ids): dets = boxes[i] if isinstance(dets, list) and len(dets) == 0: continue dets = dets.astype(float) scores = dets[:, -1] xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4]) xs = xywh_dets[:, 0] ys = xywh_dets[:, 1] ws = xywh_dets[:, 2] hs = xywh_dets[:, 3] results.extend( [{'image_id': image_id, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k]} for k in range(dets.shape[0])]) return results def _do_detection_eval(json_dataset, res_file, output_dir): coco_dt = json_dataset.COCO.loadRes(str(res_file)) coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'bbox') coco_eval.evaluate() coco_eval.accumulate() _log_detection_eval_metrics(json_dataset, coco_eval) eval_file = os.path.join(output_dir, 'detection_results.pkl') save_object(coco_eval, eval_file) logger.info('Wrote json eval results to: {}'.format(eval_file)) return coco_eval def _log_detection_eval_metrics(json_dataset, coco_eval): def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] assert np.isclose(iou_thr, thr) return ind IoU_lo_thresh = 0.5 IoU_hi_thresh = 0.95 ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) # precision has dims (iou, recall, cls, area range, max dets) # area range index 0: all area ranges # max dets index 2: 100 per image precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) logger.info( '~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ~~~~'.format( IoU_lo_thresh, IoU_hi_thresh)) logger.info('{:.1f}'.format(100 * ap_default)) for cls_ind, cls in enumerate(json_dataset.classes): if cls == '__background__': continue # minus 1 because of __background__ precision = coco_eval.eval['precision'][ ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] ap = np.mean(precision[precision > -1]) logger.info('{:.1f}'.format(100 * ap)) logger.info('~~~~ Summary metrics ~~~~') coco_eval.summarize() def evaluate_box_proposals( json_dataset, roidb, thresholds=None, area='all', limit=None, class_specific=False ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2]] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) gt_classes = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where( (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] _gt_classes = entry["gt_classes"][valid_gt_inds] assert gt_boxes.shape[0] == _gt_classes.shape[0] gt_classes = np.hstack((gt_classes, _gt_classes)) num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) if not class_specific: gt_overlaps = np.sort(gt_overlaps) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos} else: gt_classes_unique = np.unique(gt_classes) recalls = np.zeros((gt_classes_unique.shape[0], thresholds.shape[0])) # compute recall for each category and each iou threshold for i, category_id in enumerate(gt_classes_unique): inds = (gt_classes == category_id) num_pos_per_category = float(inds.sum()) for j, thresh in enumerate(thresholds): recalls[i][j] = ( gt_overlaps[inds] >= thresh ).sum() / num_pos_per_category ar = recalls.mean(axis=1).mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos} def evaluate_keypoints( json_dataset, all_boxes, all_keypoints, output_dir, use_salt=True, cleanup=False ): res_file = os.path.join( output_dir, 'keypoints_' + json_dataset.name + '_results' ) if use_salt: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' _write_coco_keypoint_results_file( json_dataset, all_boxes, all_keypoints, res_file) # Only do evaluation on non-test sets (annotations are undisclosed on test) if json_dataset.name.find('test') == -1: coco_eval = _do_keypoint_eval(json_dataset, res_file, output_dir) else: logger.warning( '{} eval ignored as annotations are undisclosed on test: {} ignored' .format("Keypoints", json_dataset.name) ) coco_eval = None # Optionally cleanup results json file if cleanup: os.remove(res_file) return coco_eval def _write_coco_keypoint_results_file( json_dataset, all_boxes, all_keypoints, res_file ): results = [] for cls_ind, cls in enumerate(json_dataset.classes): if cls == '__background__': continue if cls_ind >= len(all_keypoints): break logger.info( 'Collecting {} results ({:d}/{:d})'.format( cls, cls_ind, len(all_keypoints) - 1)) cat_id = json_dataset.category_to_id_map[cls] results.extend(_coco_kp_results_one_category( json_dataset, all_boxes[cls_ind], all_keypoints[cls_ind], cat_id)) logger.info( 'Writing keypoint results json to: {}'.format( os.path.abspath(res_file))) with open(res_file, 'w') as fid: json.dump(results, fid) def _coco_kp_results_one_category(json_dataset, boxes, kps, cat_id): results = [] image_ids = json_dataset.COCO.getImgIds() image_ids.sort() assert len(kps) == len(image_ids) assert len(boxes) == len(image_ids) use_box_score = False if cfg.KRCNN.KEYPOINT_CONFIDENCE == 'logit': # This is ugly; see utils.keypoints.heatmap_to_keypoints for the magic # indexes score_index = 2 elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'prob': score_index = 3 elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'bbox': use_box_score = True else: raise ValueError( 'KRCNN.KEYPOINT_CONFIDENCE must be "logit", "prob", or "bbox"') for i, image_id in enumerate(image_ids): if len(boxes[i]) == 0: continue kps_dets = kps[i] scores = boxes[i][:, -1].astype(float) if len(kps_dets) == 0: continue for j in range(len(kps_dets)): xy = [] kps_score = 0 for k in range(kps_dets[j].shape[1]): xy.append(float(kps_dets[j][0, k])) xy.append(float(kps_dets[j][1, k])) xy.append(1) if not use_box_score: kps_score += kps_dets[j][score_index, k] if use_box_score: kps_score = scores[j] else: kps_score /= kps_dets[j].shape[1] results.extend([{'image_id': image_id, 'category_id': cat_id, 'keypoints': xy, 'score': kps_score}]) return results def _do_keypoint_eval(json_dataset, res_file, output_dir): ann_type = 'keypoints' imgIds = json_dataset.COCO.getImgIds() imgIds.sort() coco_dt = json_dataset.COCO.loadRes(res_file) coco_eval = COCOeval(json_dataset.COCO, coco_dt, ann_type) coco_eval.params.imgIds = imgIds coco_eval.evaluate() coco_eval.accumulate() eval_file = os.path.join(output_dir, 'keypoint_results.pkl') save_object(coco_eval, eval_file) logger.info('Wrote json eval results to: {}'.format(eval_file)) coco_eval.summarize() return coco_eval ================================================ FILE: detectron/datasets/roidb.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Functions for common roidb manipulations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from past.builtins import basestring import logging import numpy as np from detectron.core.config import cfg from detectron.datasets.json_dataset import JsonDataset import detectron.utils.boxes as box_utils import detectron.utils.keypoints as keypoint_utils import detectron.utils.segms as segm_utils logger = logging.getLogger(__name__) def combined_roidb_for_training(dataset_names, proposal_files): """Load and concatenate roidbs for one or more datasets, along with optional object proposals. The roidb entries are then prepared for use in training, which involves caching certain types of metadata for each roidb entry. """ def get_roidb(dataset_name, proposal_file): ds = JsonDataset(dataset_name) roidb = ds.get_roidb( gt=True, proposal_file=proposal_file, crowd_filter_thresh=cfg.TRAIN.CROWD_FILTER_THRESH ) if cfg.TRAIN.USE_FLIPPED: logger.info('Appending horizontally-flipped training examples...') extend_with_flipped_entries(roidb, ds) logger.info('Loaded dataset: {:s}'.format(ds.name)) return roidb if isinstance(dataset_names, basestring): dataset_names = (dataset_names, ) if isinstance(proposal_files, basestring): proposal_files = (proposal_files, ) if len(proposal_files) == 0: proposal_files = (None, ) * len(dataset_names) assert len(dataset_names) == len(proposal_files) roidbs = [get_roidb(*args) for args in zip(dataset_names, proposal_files)] roidb = roidbs[0] for r in roidbs[1:]: roidb.extend(r) roidb = filter_for_training(roidb) logger.info('Computing bounding-box regression targets...') add_bbox_regression_targets(roidb) logger.info('done') _compute_and_log_stats(roidb) return roidb def extend_with_flipped_entries(roidb, dataset): """Flip each entry in the given roidb and return a new roidb that is the concatenation of the original roidb and the flipped entries. "Flipping" an entry means that that image and associated metadata (e.g., ground truth boxes and object proposals) are horizontally flipped. """ flipped_roidb = [] for entry in roidb: width = entry['width'] boxes = entry['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = width - oldx2 - 1 boxes[:, 2] = width - oldx1 - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() flipped_entry = {} dont_copy = ('boxes', 'segms', 'gt_keypoints', 'flipped') for k, v in entry.items(): if k not in dont_copy: flipped_entry[k] = v flipped_entry['boxes'] = boxes flipped_entry['segms'] = segm_utils.flip_segms( entry['segms'], entry['height'], entry['width'] ) if dataset.keypoints is not None: flipped_entry['gt_keypoints'] = keypoint_utils.flip_keypoints( dataset.keypoints, dataset.keypoint_flip_map, entry['gt_keypoints'], entry['width'] ) flipped_entry['flipped'] = True flipped_roidb.append(flipped_entry) roidb.extend(flipped_roidb) def filter_for_training(roidb): """Remove roidb entries that have no usable RoIs based on config settings. """ def is_valid(entry): # Valid images have: # (1) At least one foreground RoI OR # (2) At least one background RoI overlaps = entry['max_overlaps'] # find boxes with sufficient overlap fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # image is only valid if such boxes exist valid = len(fg_inds) > 0 or len(bg_inds) > 0 if cfg.MODEL.KEYPOINTS_ON: # If we're training for keypoints, exclude images with no keypoints valid = valid and entry['has_visible_keypoints'] return valid num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) logger.info('Filtered {} roidb entries: {} -> {}'. format(num - num_after, num, num_after)) return filtered_roidb def add_bbox_regression_targets(roidb): """Add information needed to train bounding-box regressors.""" for entry in roidb: entry['bbox_targets'] = compute_bbox_regression_targets(entry) def compute_bbox_regression_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = ( 1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv( ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets def _compute_and_log_stats(roidb): classes = roidb[0]['dataset'].classes char_len = np.max([len(c) for c in classes]) hist_bins = np.arange(len(classes) + 1) # Histogram of ground-truth objects gt_hist = np.zeros((len(classes)), dtype=int) for entry in roidb: gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_classes = entry['gt_classes'][gt_inds] gt_hist += np.histogram(gt_classes, bins=hist_bins)[0] logger.debug('Ground-truth class histogram:') for i, v in enumerate(gt_hist): logger.debug( '{:d}{:s}: {:d}'.format( i, classes[i].rjust(char_len), v)) logger.debug('-' * char_len) logger.debug( '{:s}: {:d}'.format( 'total'.rjust(char_len), np.sum(gt_hist))) ================================================ FILE: detectron/datasets/task_evaluation.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Evaluation interface for supported tasks (box detection, instance segmentation, keypoint detection, ...). Results are stored in an OrderedDict with the following nested structure: : : : is any valid dataset (e.g., 'coco_2014_minival') is in ['box', 'mask', 'keypoint', 'box_proposal'] can be ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR@1000', 'ARs@1000', 'ARm@1000', 'ARl@1000', ...] is a floating point number """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import OrderedDict import logging import os import pprint from detectron.core.config import cfg from detectron.utils.logging import send_email import detectron.datasets.cityscapes_json_dataset_evaluator \ as cs_json_dataset_evaluator import detectron.datasets.json_dataset_evaluator as json_dataset_evaluator import detectron.datasets.voc_dataset_evaluator as voc_dataset_evaluator logger = logging.getLogger(__name__) def evaluate_all( dataset, all_boxes, all_segms, all_keyps, output_dir, use_matlab=False ): """Evaluate "all" tasks, where "all" includes box detection, instance segmentation, and keypoint detection. """ all_results = evaluate_boxes( dataset, all_boxes, output_dir, use_matlab=use_matlab ) logger.info('Evaluating bounding boxes is done!') if cfg.MODEL.MASK_ON: results = evaluate_masks(dataset, all_boxes, all_segms, output_dir) all_results[dataset.name].update(results[dataset.name]) logger.info('Evaluating segmentations is done!') if cfg.MODEL.KEYPOINTS_ON: results = evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir) all_results[dataset.name].update(results[dataset.name]) logger.info('Evaluating keypoints is done!') return all_results def evaluate_boxes(dataset, all_boxes, output_dir, use_matlab=False): """Evaluate bounding box detection.""" logger.info('Evaluating detections') not_comp = not cfg.TEST.COMPETITION_MODE if _use_json_dataset_evaluator(dataset): coco_eval = json_dataset_evaluator.evaluate_boxes( dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp ) box_results = _coco_eval_to_box_results(coco_eval) elif _use_cityscapes_evaluator(dataset): logger.warn('Cityscapes bbox evaluated using COCO metrics/conversions') coco_eval = json_dataset_evaluator.evaluate_boxes( dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp ) box_results = _coco_eval_to_box_results(coco_eval) elif _use_voc_evaluator(dataset): # For VOC, always use salt and always cleanup because results are # written to the shared VOCdevkit results directory voc_eval = voc_dataset_evaluator.evaluate_boxes( dataset, all_boxes, output_dir, use_matlab=use_matlab ) box_results = _voc_eval_to_box_results(voc_eval) else: raise NotImplementedError( 'No evaluator for dataset: {}'.format(dataset.name) ) return OrderedDict([(dataset.name, box_results)]) def evaluate_masks(dataset, all_boxes, all_segms, output_dir): """Evaluate instance segmentation.""" logger.info('Evaluating segmentations') not_comp = not cfg.TEST.COMPETITION_MODE if _use_json_dataset_evaluator(dataset): coco_eval = json_dataset_evaluator.evaluate_masks( dataset, all_boxes, all_segms, output_dir, use_salt=not_comp, cleanup=not_comp ) mask_results = _coco_eval_to_mask_results(coco_eval) elif _use_cityscapes_evaluator(dataset): cs_eval = cs_json_dataset_evaluator.evaluate_masks( dataset, all_boxes, all_segms, output_dir, use_salt=not_comp, cleanup=not_comp ) mask_results = _cs_eval_to_mask_results(cs_eval) else: raise NotImplementedError( 'No evaluator for dataset: {}'.format(dataset.name) ) return OrderedDict([(dataset.name, mask_results)]) def evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir): """Evaluate human keypoint detection (i.e., 2D pose estimation).""" logger.info('Evaluating detections') not_comp = not cfg.TEST.COMPETITION_MODE assert dataset.name.startswith('keypoints_coco_'), \ 'Only COCO keypoints are currently supported' coco_eval = json_dataset_evaluator.evaluate_keypoints( dataset, all_boxes, all_keyps, output_dir, use_salt=not_comp, cleanup=not_comp ) keypoint_results = _coco_eval_to_keypoint_results(coco_eval) return OrderedDict([(dataset.name, keypoint_results)]) def evaluate_box_proposals(dataset, roidb): """Evaluate bounding box object proposals.""" res = _empty_box_proposal_results() areas = {'all': '', 'small': 's', 'medium': 'm', 'large': 'l'} for limit in [100, 1000]: for area, suffix in areas.items(): stats = json_dataset_evaluator.evaluate_box_proposals( dataset, roidb, area=area, limit=limit, class_specific=cfg.TEST.CLASS_SPECIFIC_AR ) key = 'AR{}@{:d}'.format(suffix, limit) res['box_proposal'][key] = stats['ar'] return OrderedDict([(dataset.name, res)]) def log_box_proposal_results(results): """Log bounding box proposal results.""" for dataset in results.keys(): keys = results[dataset]['box_proposal'].keys() pad = max([len(k) for k in keys]) logger.info(dataset) for k, v in results[dataset]['box_proposal'].items(): logger.info('{}: {:.3f}'.format(k.ljust(pad), v)) def log_copy_paste_friendly_results(results): """Log results in a format that makes it easy to copy-and-paste in a spreadsheet. Lines are prefixed with 'copypaste: ' to make grepping easy. """ for dataset in results.keys(): logger.info('copypaste: Dataset: {}'.format(dataset)) for task, metrics in results[dataset].items(): logger.info('copypaste: Task: {}'.format(task)) metric_names = metrics.keys() metric_vals = ['{:.4f}'.format(v) for v in metrics.values()] logger.info('copypaste: ' + ','.join(metric_names)) logger.info('copypaste: ' + ','.join(metric_vals)) def check_expected_results(results, atol=0.005, rtol=0.1): """Check actual results against expected results stored in cfg.EXPECTED_RESULTS. Optionally email if the match exceeds the specified tolerance. Expected results should take the form of a list of expectations, each specified by four elements: [dataset, task, metric, expected value]. For example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...]. The expected value may also be formatted as a list [mean, std] providing an empirical mean and standard deviation from which a valid range is computed using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example: [['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...] """ # cfg contains a reference set of results that we want to check against if len(cfg.EXPECTED_RESULTS) == 0: return for dataset, task, metric, expected_val in cfg.EXPECTED_RESULTS: assert dataset in results, 'Dataset {} not in results'.format(dataset) assert task in results[dataset], 'Task {} not in results'.format(task) assert metric in results[dataset][task], \ 'Metric {} not in results'.format(metric) actual_val = results[dataset][task][metric] ok = False if isinstance(expected_val, list): assert len(expected_val) == 2, ( 'Expected result must be in (mean, std) format' ) mean, std = expected_val lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std ok = (lo < actual_val) and (actual_val < hi) msg = ( '{} > {} > {} sanity check (actual vs. expected): ' '{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})' ).format(dataset, task, metric, actual_val, mean, std, lo, hi) else: err = abs(actual_val - expected_val) tol = atol + rtol * abs(expected_val) ok = (err > tol) msg = ( '{} > {} > {} sanity check (actual vs. expected): ' '{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}' ).format(dataset, task, metric, actual_val, expected_val, err, tol) if not ok: msg = 'FAIL: ' + msg logger.error(msg) if cfg.EXPECTED_RESULTS_EMAIL != '': subject = 'Detectron end-to-end test failure' job_name = os.environ[ 'DETECTRON_JOB_NAME' ] if 'DETECTRON_JOB_NAME' in os.environ else '' job_id = os.environ[ 'WORKFLOW_RUN_ID' ] if 'WORKFLOW_RUN_ID' in os.environ else '' body = [ 'Name:', job_name, 'Run ID:', job_id, 'Failure:', msg, 'Config:', pprint.pformat(cfg), 'Env:', pprint.pformat(dict(os.environ)), ] send_email( subject, '\n\n'.join(body), cfg.EXPECTED_RESULTS_EMAIL ) else: msg = 'PASS: ' + msg logger.info(msg) def _use_json_dataset_evaluator(dataset): """Check if the dataset uses the general json dataset evaluator.""" return dataset.name.find('coco_') > -1 or cfg.TEST.FORCE_JSON_DATASET_EVAL def _use_cityscapes_evaluator(dataset): """Check if the dataset uses the Cityscapes dataset evaluator.""" return dataset.name.find('cityscapes_') > -1 def _use_voc_evaluator(dataset): """Check if the dataset uses the PASCAL VOC dataset evaluator.""" return dataset.name[:4] == 'voc_' # Indices in the stats array for COCO boxes and masks COCO_AP = 0 COCO_AP50 = 1 COCO_AP75 = 2 COCO_APS = 3 COCO_APM = 4 COCO_APL = 5 # Slight difference for keypoints COCO_KPS_APM = 3 COCO_KPS_APL = 4 # ---------------------------------------------------------------------------- # # Helper functions for producing properly formatted results. # ---------------------------------------------------------------------------- # def _coco_eval_to_box_results(coco_eval): res = _empty_box_results() if coco_eval is not None: s = coco_eval.stats res['box']['AP'] = s[COCO_AP] res['box']['AP50'] = s[COCO_AP50] res['box']['AP75'] = s[COCO_AP75] res['box']['APs'] = s[COCO_APS] res['box']['APm'] = s[COCO_APM] res['box']['APl'] = s[COCO_APL] return res def _coco_eval_to_mask_results(coco_eval): res = _empty_mask_results() if coco_eval is not None: s = coco_eval.stats res['mask']['AP'] = s[COCO_AP] res['mask']['AP50'] = s[COCO_AP50] res['mask']['AP75'] = s[COCO_AP75] res['mask']['APs'] = s[COCO_APS] res['mask']['APm'] = s[COCO_APM] res['mask']['APl'] = s[COCO_APL] return res def _coco_eval_to_keypoint_results(coco_eval): res = _empty_keypoint_results() if coco_eval is not None: s = coco_eval.stats res['keypoint']['AP'] = s[COCO_AP] res['keypoint']['AP50'] = s[COCO_AP50] res['keypoint']['AP75'] = s[COCO_AP75] res['keypoint']['APm'] = s[COCO_KPS_APM] res['keypoint']['APl'] = s[COCO_KPS_APL] return res def _voc_eval_to_box_results(voc_eval): # Not supported (return empty results) return _empty_box_results() def _cs_eval_to_mask_results(cs_eval): # Not supported (return empty results) return _empty_mask_results() def _empty_box_results(): return OrderedDict({ 'box': OrderedDict( [ ('AP', -1), ('AP50', -1), ('AP75', -1), ('APs', -1), ('APm', -1), ('APl', -1), ] ) }) def _empty_mask_results(): return OrderedDict({ 'mask': OrderedDict( [ ('AP', -1), ('AP50', -1), ('AP75', -1), ('APs', -1), ('APm', -1), ('APl', -1), ] ) }) def _empty_keypoint_results(): return OrderedDict({ 'keypoint': OrderedDict( [ ('AP', -1), ('AP50', -1), ('AP75', -1), ('APm', -1), ('APl', -1), ] ) }) def _empty_box_proposal_results(): return OrderedDict({ 'box_proposal': OrderedDict( [ ('AR@100', -1), ('ARs@100', -1), ('ARm@100', -1), ('ARl@100', -1), ('AR@1000', -1), ('ARs@1000', -1), ('ARm@1000', -1), ('ARl@1000', -1), ] ) }) ================================================ FILE: detectron/datasets/voc_dataset_evaluator.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """PASCAL VOC dataset evaluation interface.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np import os import shutil import uuid from detectron.core.config import cfg from detectron.datasets.dataset_catalog import get_devkit_dir from detectron.datasets.voc_eval import voc_eval from detectron.utils.io import save_object logger = logging.getLogger(__name__) def evaluate_boxes( json_dataset, all_boxes, output_dir, use_salt=True, cleanup=True, use_matlab=False ): salt = '_{}'.format(str(uuid.uuid4())) if use_salt else '' filenames = _write_voc_results_files(json_dataset, all_boxes, salt) _do_python_eval(json_dataset, salt, output_dir) if use_matlab: _do_matlab_eval(json_dataset, salt, output_dir) if cleanup: for filename in filenames: shutil.copy(filename, output_dir) os.remove(filename) return None def _write_voc_results_files(json_dataset, all_boxes, salt): filenames = [] image_set_path = voc_info(json_dataset)['image_set_path'] assert os.path.exists(image_set_path), \ 'Image set path does not exist: {}'.format(image_set_path) with open(image_set_path, 'r') as f: image_index = [x.strip() for x in f.readlines()] # Sanity check that order of images in json dataset matches order in the # image set roidb = json_dataset.get_roidb() for i, entry in enumerate(roidb): index = os.path.splitext(os.path.split(entry['image'])[1])[0] assert index == image_index[i] for cls_ind, cls in enumerate(json_dataset.classes): if cls == '__background__': continue logger.info('Writing VOC results for: {}'.format(cls)) filename = _get_voc_results_file_template(json_dataset, salt).format(cls) filenames.append(filename) assert len(all_boxes[cls_ind]) == len(image_index) with open(filename, 'wt') as f: for im_ind, index in enumerate(image_index): dets = all_boxes[cls_ind][im_ind] if type(dets) == list: assert len(dets) == 0, \ 'dets should be numpy.ndarray or empty list' continue # the VOCdevkit expects 1-based indices for k in range(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1)) return filenames def _get_voc_results_file_template(json_dataset, salt): info = voc_info(json_dataset) year = info['year'] image_set = info['image_set'] devkit_path = info['devkit_path'] # VOCdevkit/results/VOC2007/Main/_det_test_aeroplane.txt filename = 'comp4' + salt + '_det_' + image_set + '_{:s}.txt' return os.path.join(devkit_path, 'results', 'VOC' + year, 'Main', filename) def _do_python_eval(json_dataset, salt, output_dir='output'): info = voc_info(json_dataset) year = info['year'] anno_path = info['anno_path'] image_set_path = info['image_set_path'] devkit_path = info['devkit_path'] cachedir = os.path.join(devkit_path, 'annotations_cache') aps = [] # The PASCAL VOC metric changed in 2010 use_07_metric = True if int(year) < 2010 else False logger.info('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) if not os.path.isdir(output_dir): os.mkdir(output_dir) for _, cls in enumerate(json_dataset.classes): if cls == '__background__': continue filename = _get_voc_results_file_template( json_dataset, salt).format(cls) rec, prec, ap = voc_eval( filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5, use_07_metric=use_07_metric) aps += [ap] logger.info('AP for {} = {:.4f}'.format(cls, ap)) res_file = os.path.join(output_dir, cls + '_pr.pkl') save_object({'rec': rec, 'prec': prec, 'ap': ap}, res_file) logger.info('Mean AP = {:.4f}'.format(np.mean(aps))) logger.info('~~~~~~~~') logger.info('Results:') for ap in aps: logger.info('{:.3f}'.format(ap)) logger.info('{:.3f}'.format(np.mean(aps))) logger.info('~~~~~~~~') logger.info('') logger.info('----------------------------------------------------------') logger.info('Results computed with the **unofficial** Python eval code.') logger.info('Results should be very close to the official MATLAB code.') logger.info('Use `./tools/reval.py --matlab ...` for your paper.') logger.info('-- Thanks, The Management') logger.info('----------------------------------------------------------') def _do_matlab_eval(json_dataset, salt, output_dir='output'): import subprocess logger.info('-----------------------------------------------------') logger.info('Computing results with the official MATLAB eval code.') logger.info('-----------------------------------------------------') info = voc_info(json_dataset) path = os.path.join( cfg.ROOT_DIR, 'detectron', 'datasets', 'VOCdevkit-matlab-wrapper') cmd = 'cd {} && '.format(path) cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB) cmd += '-r "dbstop if error; ' cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \ .format(info['devkit_path'], 'comp4' + salt, info['image_set'], output_dir) logger.info('Running:\n{}'.format(cmd)) subprocess.call(cmd, shell=True) def voc_info(json_dataset): year = json_dataset.name[4:8] image_set = json_dataset.name[9:] devkit_path = get_devkit_dir(json_dataset.name) assert os.path.exists(devkit_path), \ 'Devkit directory {} not found'.format(devkit_path) anno_path = os.path.join( devkit_path, 'VOC' + year, 'Annotations', '{:s}.xml') image_set_path = os.path.join( devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt') return dict( year=year, image_set=image_set, devkit_path=devkit_path, anno_path=anno_path, image_set_path=image_set_path) ================================================ FILE: detectron/datasets/voc_eval.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast/er R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Bharath Hariharan # -------------------------------------------------------- """Python implementation of the PASCAL VOC devkit's AP evaluation code.""" import logging import numpy as np import os import xml.etree.ElementTree as ET from detectron.utils.io import load_object from detectron.utils.io import save_object logger = logging.getLogger(__name__) def parse_rec(filename): """Parse a PASCAL VOC xml file.""" tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects def voc_ap(rec, prec, use_07_metric=False): """Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11-point method (default:False). """ if use_07_metric: # 11 point metric ap = 0. for t in np.arange(0., 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap = ap + p / 11. else: # correct AP calculation # first append sentinel values at the end mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) # compute the precision envelope for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # to calculate area under PR curve, look for points # where X axis (recall) changes value i = np.where(mrec[1:] != mrec[:-1])[0] # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap def voc_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # first load gt if not os.path.isdir(cachedir): os.mkdir(cachedir) imageset = os.path.splitext(os.path.basename(imagesetfile))[0] cachefile = os.path.join(cachedir, imageset + '_annots.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] if not os.path.isfile(cachefile): # load annots recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec(annopath.format(imagename)) if i % 100 == 0: logger.info( 'Reading annotation for {:d}/{:d}'.format( i + 1, len(imagenames))) # save logger.info('Saving cached annotations to {:s}'.format(cachefile)) save_object(recs, cachefile) else: recs = load_object(cachefile) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {'bbox': bbox, 'difficult': difficult, 'det': det} # read dets detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap ================================================ FILE: detectron/modeling/FPN.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Functions for using a Feature Pyramid Network (FPN).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import collections import numpy as np from detectron.core.config import cfg from detectron.modeling.generate_anchors import generate_anchors from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill from detectron.utils.net import get_group_gn import detectron.modeling.ResNet as ResNet import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils # Lowest and highest pyramid levels in the backbone network. For FPN, we assume # that all networks have 5 spatial reductions, each by a factor of 2. Level 1 # would correspond to the input image, hence it does not make sense to use it. LOWEST_BACKBONE_LVL = 2 # E.g., "conv2"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level # ---------------------------------------------------------------------------- # # FPN with ResNet # ---------------------------------------------------------------------------- # def add_fpn_ResNet50_conv5_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet50_conv5_body, fpn_level_info_ResNet50_conv5 ) def add_fpn_ResNet50_conv5_P2only_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet50_conv5_body, fpn_level_info_ResNet50_conv5, P2only=True ) def add_fpn_ResNet101_conv5_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet101_conv5_body, fpn_level_info_ResNet101_conv5 ) def add_fpn_ResNet101_conv5_P2only_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet101_conv5_body, fpn_level_info_ResNet101_conv5, P2only=True ) def add_fpn_ResNet152_conv5_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet152_conv5_body, fpn_level_info_ResNet152_conv5 ) def add_fpn_ResNet152_conv5_P2only_body(model): return add_fpn_onto_conv_body( model, ResNet.add_ResNet152_conv5_body, fpn_level_info_ResNet152_conv5, P2only=True ) # ---------------------------------------------------------------------------- # # Functions for bolting FPN onto a backbone architectures # ---------------------------------------------------------------------------- # def add_fpn_onto_conv_body( model, conv_body_func, fpn_level_info_func, P2only=False ): """Add the specified conv body to the model and then add FPN levels to it. """ # Note: blobs_conv is in revsersed order: [fpn5, fpn4, fpn3, fpn2] # similarly for dims_conv: [2048, 1024, 512, 256] # similarly for spatial_scales_fpn: [1/32, 1/16, 1/8, 1/4] conv_body_func(model) blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn( model, fpn_level_info_func() ) if P2only: # use only the finest level return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1] else: # use all levels return blobs_fpn, dim_fpn, spatial_scales_fpn def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom) def get_min_max_levels(): """The min and max FPN levels required for supporting RPN and/or RoI transform operations on multiple FPN levels. """ min_level = LOWEST_BACKBONE_LVL max_level = HIGHEST_BACKBONE_LVL if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS: max_level = cfg.FPN.RPN_MAX_LEVEL min_level = cfg.FPN.RPN_MIN_LEVEL if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS: max_level = cfg.FPN.ROI_MAX_LEVEL min_level = cfg.FPN.ROI_MIN_LEVEL if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS: max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL) min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL) return min_level, max_level # ---------------------------------------------------------------------------- # # RPN with an FPN backbone # ---------------------------------------------------------------------------- # def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc ) def add_fpn_rpn_losses(model): """Add RPN on FPN specific losses.""" loss_gradients = {} for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map # shape model.net.SpatialNarrowAs( ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl], 'rpn_labels_int32_fpn' + slvl ) for key in ('targets', 'inside_weights', 'outside_weights'): model.net.SpatialNarrowAs( [ 'rpn_bbox_' + key + '_wide_fpn' + slvl, 'rpn_bbox_pred_fpn' + slvl ], 'rpn_bbox_' + key + '_fpn' + slvl ) loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss( ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl], 'loss_rpn_cls_fpn' + slvl, normalize=0, scale=( model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM / cfg.TRAIN.IMS_PER_BATCH ) ) # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH loss_rpn_bbox_fpn = model.net.SmoothL1Loss( [ 'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl, 'rpn_bbox_inside_weights_fpn' + slvl, 'rpn_bbox_outside_weights_fpn' + slvl ], 'loss_rpn_bbox_fpn' + slvl, beta=1. / 9., scale=model.GetLossScale(), ) loss_gradients.update( blob_utils. get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn]) ) model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl]) return loss_gradients # ---------------------------------------------------------------------------- # # Helper functions for working with multilevel FPN RoIs # ---------------------------------------------------------------------------- # def map_rois_to_fpn_levels(rois, k_min, k_max): """Determine which FPN level each RoI in a set of RoIs should map to based on the heuristic in the FPN paper. """ # Compute level ids s = np.sqrt(box_utils.boxes_area(rois)) s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 # Eqn.(1) in FPN paper target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) target_lvls = np.clip(target_lvls, k_min, k_max) return target_lvls def add_multilevel_roi_blobs( blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max ): """Add RoI blobs for multiple FPN levels to the blobs dict. blobs: a dict mapping from blob name to numpy ndarray blob_prefix: name prefix to use for the FPN blobs rois: the source rois as a 2D numpy array of shape (N, 5) where each row is an roi and the columns encode (batch_idx, x1, y1, x2, y2) target_lvls: numpy array of shape (N, ) indicating which FPN level each roi in rois should be assigned to lvl_min: the finest (highest resolution) FPN level (e.g., 2) lvl_max: the coarest (lowest resolution) FPN level (e.g., 6) """ rois_idx_order = np.empty((0, )) rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert for lvl in range(lvl_min, lvl_max + 1): idx_lvl = np.where(target_lvls == lvl)[0] blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :] rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) rois_stacked = np.vstack( [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] ) rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore # Sanity check that restore order is correct assert (rois_stacked[rois_idx_restore] == rois).all() # ---------------------------------------------------------------------------- # # FPN level info for stages 5, 4, 3, 2 for select models (more can be added) # ---------------------------------------------------------------------------- # FpnLevelInfo = collections.namedtuple( 'FpnLevelInfo', ['blobs', 'dims', 'spatial_scales'] ) def fpn_level_info_ResNet50_conv5(): return FpnLevelInfo( blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'), dims=(2048, 1024, 512, 256), spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) ) def fpn_level_info_ResNet101_conv5(): return FpnLevelInfo( blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'), dims=(2048, 1024, 512, 256), spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) ) def fpn_level_info_ResNet152_conv5(): return FpnLevelInfo( blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'), dims=(2048, 1024, 512, 256), spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) ) ================================================ FILE: detectron/modeling/ResNet.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Implements ResNet and ResNeXt. See: https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.utils.net import get_group_gn # ---------------------------------------------------------------------------- # # Bits for specific architectures (ResNet50, ResNet101, ...) # ---------------------------------------------------------------------------- # def add_ResNet50_conv4_body(model): return add_ResNet_convX_body(model, (3, 4, 6)) def add_ResNet50_conv5_body(model): return add_ResNet_convX_body(model, (3, 4, 6, 3)) def add_ResNet101_conv4_body(model): return add_ResNet_convX_body(model, (3, 4, 23)) def add_ResNet101_conv5_body(model): return add_ResNet_convX_body(model, (3, 4, 23, 3)) def add_ResNet152_conv5_body(model): return add_ResNet_convX_body(model, (3, 8, 36, 3)) # ---------------------------------------------------------------------------- # # Generic ResNet components # ---------------------------------------------------------------------------- # def add_stage( model, prefix, blob_in, n, dim_in, dim_out, dim_inner, dilation, stride_init=2 ): """Add a ResNet stage to the model by stacking n residual blocks.""" # e.g., prefix = res2 for i in range(n): blob_in = add_residual_block( model, '{}_{}'.format(prefix, i), blob_in, dim_in, dim_out, dim_inner, dilation, stride_init, # Not using inplace for the last block; # it may be fetched externally or used by FPN inplace_sum=i < n - 1 ) dim_in = dim_out return blob_in, dim_in def add_ResNet_convX_body(model, block_counts): """Add a ResNet body from input data up through the res5 (aka conv5) stage. The final res5/conv5 stage may be optionally excluded (hence convX, where X = 4 or 5).""" freeze_at = cfg.TRAIN.FREEZE_AT assert freeze_at in [0, 2, 3, 4, 5] # add the stem (by default, conv1 and pool1 with bn; can support gn) p, dim_in = globals()[cfg.RESNETS.STEM_FUNC](model, 'data') dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP (n1, n2, n3) = block_counts[:3] s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1) if freeze_at == 2: model.StopGradient(s, s) s, dim_in = add_stage( model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1 ) if freeze_at == 3: model.StopGradient(s, s) s, dim_in = add_stage( model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1 ) if freeze_at == 4: model.StopGradient(s, s) if len(block_counts) == 4: n4 = block_counts[3] s, dim_in = add_stage( model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8, cfg.RESNETS.RES5_DILATION ) if freeze_at == 5: model.StopGradient(s, s) return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION else: return s, dim_in, 1. / 16. def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale): """Adds an RoI feature transformation (e.g., RoI pooling) followed by a res5/conv5 head applied to each RoI.""" # TODO(rbg): This contains Fast R-CNN specific config options making it non- # reusable; make this more generic with model-specific wrappers model.RoIFeatureTransform( blob_in, 'pool5', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7) s, dim_in = add_stage( model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1, stride_init ) s = model.AveragePool(s, 'res5_pool', kernel=7) return s, 2048 def add_residual_block( model, prefix, blob_in, dim_in, dim_out, dim_inner, dilation, stride_init=2, inplace_sum=False ): """Add a residual block to the model.""" # prefix = res_, e.g., res2_3 # Max pooling is performed prior to the first stage (which is uniquely # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage stride = stride_init if ( dim_in != dim_out and dim_in != 64 and dilation == 1 ) else 1 # transformation blob tr = globals()[cfg.RESNETS.TRANS_FUNC]( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, group=cfg.RESNETS.NUM_GROUPS, dilation=dilation ) # sum -> ReLU # shortcut function: by default using bn; support gn add_shortcut = globals()[cfg.RESNETS.SHORTCUT_FUNC] sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride) if inplace_sum: s = model.net.Sum([tr, sc], tr) else: s = model.net.Sum([tr, sc], prefix + '_sum') return model.Relu(s, s) # ------------------------------------------------------------------------------ # various shortcuts (may expand and may consider a new helper) # ------------------------------------------------------------------------------ def basic_bn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): """ For a pre-trained network that used BN. An AffineChannel op replaces BN during fine-tuning. """ if dim_in == dim_out: return blob_in c = model.Conv( blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, stride=stride, no_bias=1 ) return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out) def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): if dim_in == dim_out: return blob_in # output name is prefix + '_branch1_gn' return model.ConvGN( blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=stride, pad=0, group=1, ) # ------------------------------------------------------------------------------ # various stems (may expand and may consider a new helper) # ------------------------------------------------------------------------------ def basic_bn_stem(model, data, **kwargs): """Add a basic ResNet stem. For a pre-trained network that used BN. An AffineChannel op replaces BN during fine-tuning. """ dim = 64 p = model.Conv(data, 'conv1', 3, dim, 7, pad=3, stride=2, no_bias=1) p = model.AffineChannel(p, 'res_conv1_bn', dim=dim, inplace=True) p = model.Relu(p, p) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) return p, dim def basic_gn_stem(model, data, **kwargs): """Add a basic ResNet stem (using GN)""" dim = 64 p = model.ConvGN( data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2 ) p = model.Relu(p, p) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) return p, dim # ------------------------------------------------------------------------------ # various transformations (may expand and may consider a new helper) # ------------------------------------------------------------------------------ def bottleneck_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> BN -> ReLU cur = model.ConvAffine( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, stride=str1x1, pad=0, inplace=True ) cur = model.Relu(cur, cur) # conv 3x3 -> BN -> ReLU cur = model.ConvAffine( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, inplace=True ) cur = model.Relu(cur, cur) # conv 1x1 -> BN (no ReLU) # NB: for now this AffineChannel op cannot be in-place due to a bug in C2 # gradient computation for graphs like this cur = model.ConvAffine( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, stride=1, pad=0, inplace=False ) return cur def bottleneck_gn_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation with GroupNorm to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> GN -> ReLU cur = model.ConvGN( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, group_gn=get_group_gn(dim_inner), stride=str1x1, pad=0, ) cur = model.Relu(cur, cur) # conv 3x3 -> GN -> ReLU cur = model.ConvGN( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, group_gn=get_group_gn(dim_inner), stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, ) cur = model.Relu(cur, cur) # conv 1x1 -> GN (no ReLU) cur = model.ConvGN( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=1, pad=0, ) return cur ================================================ FILE: detectron/modeling/VGG16.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """VGG16 from https://arxiv.org/abs/1409.1556.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg def add_VGG16_conv5_body(model): model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1) model.Relu('conv1_1', 'conv1_1') model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1) model.Relu('conv1_2', 'conv1_2') model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2) model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1) model.Relu('conv2_1', 'conv2_1') model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1) model.Relu('conv2_2', 'conv2_2') model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2) model.StopGradient('pool2', 'pool2') model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1) model.Relu('conv3_1', 'conv3_1') model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1) model.Relu('conv3_2', 'conv3_2') model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1) model.Relu('conv3_3', 'conv3_3') model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2) model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1) model.Relu('conv4_1', 'conv4_1') model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1) model.Relu('conv4_2', 'conv4_2') model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1) model.Relu('conv4_3', 'conv4_3') model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2) model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1) model.Relu('conv5_1', 'conv5_1') model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1) model.Relu('conv5_2', 'conv5_2') model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1) blob_out = model.Relu('conv5_3', 'conv5_3') return blob_out, 512, 1. / 16. def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale): model.RoIFeatureTransform( blob_in, 'pool5', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=7, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', 4096, 4096) blob_out = model.Relu('fc7', 'fc7') return blob_out, 4096 ================================================ FILE: detectron/modeling/VGG_CNN_M_1024.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg def add_VGG_CNN_M_1024_conv5_body(model): model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2) model.Relu('conv1', 'conv1') model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.) model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2) model.StopGradient('pool1', 'pool1') # No updates at conv1 and below (norm1 and pool1 have no params, # so we can stop gradients before them, too) model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2) model.Relu('conv2', 'conv2') model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.) model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2) model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1) model.Relu('conv3', 'conv3') model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1) model.Relu('conv4', 'conv4') model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1) blob_out = model.Relu('conv5', 'conv5') return blob_out, 512, 1. / 16. def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale): model.RoIFeatureTransform( blob_in, 'pool5', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=6, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', 4096, 1024) blob_out = model.Relu('fc7', 'fc7') return blob_out, 1024 ================================================ FILE: detectron/modeling/__init__.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## ================================================ FILE: detectron/modeling/detector.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Defines DetectionModelHelper, the class that represents a Detectron model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import logging from caffe2.python import cnn from caffe2.python import core from caffe2.python import workspace from caffe2.python.modeling import initializers from caffe2.python.modeling.parameter_info import ParameterTags from detectron.core.config import cfg from detectron.ops.collect_and_distribute_fpn_rpn_proposals \ import CollectAndDistributeFpnRpnProposalsOp from detectron.ops.generate_proposal_labels import GenerateProposalLabelsOp from detectron.ops.generate_proposals import GenerateProposalsOp import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data import detectron.utils.c2 as c2_utils logger = logging.getLogger(__name__) class DetectionModelHelper(cnn.CNNModelHelper): def __init__(self, **kwargs): # Handle args specific to the DetectionModelHelper, others pass through # to CNNModelHelper self.train = kwargs.get('train', False) self.num_classes = kwargs.get('num_classes', -1) assert self.num_classes > 0, 'num_classes must be > 0' for k in ('train', 'num_classes'): if k in kwargs: del kwargs[k] kwargs['order'] = 'NCHW' # Defensively set cudnn_exhaustive_search to False in case the default # changes in CNNModelHelper. The detection code uses variable size # inputs that might not play nicely with cudnn_exhaustive_search. kwargs['cudnn_exhaustive_search'] = False super(DetectionModelHelper, self).__init__(**kwargs) self.roi_data_loader = None self.losses = [] self.metrics = [] self.do_not_update_params = [] # Param on this list are not updated self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE self.net.Proto().num_workers = cfg.NUM_GPUS * 4 self.prev_use_cudnn = self.use_cudnn self.gn_params = [] # Param on this list are GroupNorm parameters def TrainableParams(self, gpu_id=-1): """Get the blob names for all trainable parameters, possibly filtered by GPU id. """ return [ p for p in self.params if ( p in self.param_to_grad and # p has a gradient p not in self.do_not_update_params and # not on the blacklist (gpu_id == -1 or # filter for gpu assignment, if gpu_id set str(p).find('gpu_{}'.format(gpu_id)) == 0) )] def AffineChannel(self, blob_in, blob_out, dim, inplace=False): """Affine transformation to replace BN in networks where BN cannot be used (e.g., because the minibatch size is too small). The operations can be done in place to save memory. """ blob_out = blob_out or self.net.NextName() param_prefix = blob_out scale = self.create_param( param_name=param_prefix + '_s', initializer=initializers.Initializer("ConstantFill", value=1.), tags=ParameterTags.WEIGHT, shape=[dim, ], ) bias = self.create_param( param_name=param_prefix + '_b', initializer=initializers.Initializer("ConstantFill", value=0.), tags=ParameterTags.BIAS, shape=[dim, ], ) if inplace: return self.net.AffineChannel([blob_in, scale, bias], blob_in) else: return self.net.AffineChannel([blob_in, scale, bias], blob_out) def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale): """Op for generating RPN porposals. blobs_in: - 'rpn_cls_probs': 4D tensor of shape (N, A, H, W), where N is the number of minibatch images, A is the number of anchors per locations, and (H, W) is the spatial size of the prediction grid. Each value represents a "probability of object" rating in [0, 1]. - 'rpn_bbox_pred': 4D tensor of shape (N, 4 * A, H, W) of predicted deltas for transformation anchor boxes into RPN proposals. - 'im_info': 2D tensor of shape (N, 3) where the three columns encode the input image's [height, width, scale]. Height and width are for the input to the network, not the original image; scale is the scale factor used to scale the original image to the network input size. blobs_out: - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the five columns encode [batch ind, x1, y1, x2, y2]. The boxes are w.r.t. the network input, which is a *scaled* version of the original image; these proposals must be scaled by 1 / scale (where scale comes from im_info; see above) to transform it back to the original input image coordinate system. - 'rpn_roi_probs': 1D tensor of objectness probability scores (extracted from rpn_cls_probs; see above). """ cfg_key = 'TRAIN' if self.train else 'TEST' if cfg[cfg_key].GENERATE_PROPOSALS_ON_GPU: rpn_pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N rpn_post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N rpn_nms_thresh = cfg[cfg_key].RPN_NMS_THRESH rpn_min_size = float(cfg[cfg_key].RPN_MIN_SIZE) input_name = str(blobs_in[0]) lvl = int(input_name[-1]) if input_name[-1].isdigit() else None anchors_name = 'anchors{}'.format(lvl) if lvl else 'anchors' for i in range(cfg.NUM_GPUS): with c2_utils.CudaScope(i): workspace.FeedBlob( 'gpu_{}/{}'.format(i, anchors_name), anchors.astype(np.float32)) self.net.GenerateProposals( blobs_in + [anchors_name], blobs_out, spatial_scale=spatial_scale, pre_nms_topN=rpn_pre_nms_topN, post_nms_topN=rpn_post_nms_topN, nms_thresh=rpn_nms_thresh, min_size=rpn_min_size, ) else: name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in]) # spatial_scale passed to the Python op is only used in # convert_pkl_to_pb self.net.Python( GenerateProposalsOp(anchors, spatial_scale, self.train).forward )(blobs_in, blobs_out, name=name, spatial_scale=spatial_scale) return blobs_out def GenerateProposalLabels(self, blobs_in): """Op for generating training labels for RPN proposals. This is used when training RPN jointly with Fast/Mask R-CNN (as in end-to-end Faster R-CNN training). blobs_in: - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals - 'roidb': roidb entries that will be labeled - 'im_info': See GenerateProposals doc. blobs_out: - (variable set of blobs): returns whatever blobs are required for training the model. It does this by querying the data loader for the list of blobs that are needed. """ name = 'GenerateProposalLabelsOp:' + ','.join( [str(b) for b in blobs_in] ) # The list of blobs is not known before run-time because it depends on # the specific model being trained. Query the data loader to get the # list of output blob names. blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names( is_training=self.train ) blobs_out = [core.ScopedBlobReference(b) for b in blobs_out] self.net.Python(GenerateProposalLabelsOp().forward)( blobs_in, blobs_out, name=name ) return blobs_out def CollectAndDistributeFpnRpnProposals(self): """Merge RPN proposals generated at multiple FPN levels and then distribute those proposals to their appropriate FPN levels. An anchor at one FPN level may predict an RoI that will map to another level, hence the need to redistribute the proposals. This function assumes standard blob names for input and output blobs. Input blobs: [rpn_rois_fpn, ..., rpn_rois_fpn, rpn_roi_probs_fpn, ..., rpn_roi_probs_fpn] - rpn_rois_fpn are the RPN proposals for FPN level i; see rpn_rois documentation from GenerateProposals. - rpn_roi_probs_fpn are the RPN objectness probabilities for FPN level i; see rpn_roi_probs documentation from GenerateProposals. If used during training, then the input blobs will also include: [roidb, im_info] (see GenerateProposalLabels). Output blobs: [rois_fpn, ..., rois_rpn, rois, rois_idx_restore] - rois_fpn are the RPN proposals for FPN level i - rois_idx_restore is a permutation on the concatenation of all rois_fpn, i=min...max, such that when applied the RPN RoIs are restored to their original order in the input blobs. If used during training, then the output blobs will also include: [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights]. """ k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL # Prepare input blobs rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)] score_names = [ 'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1) ] blobs_in = rois_names + score_names if self.train: blobs_in += ['roidb', 'im_info'] blobs_in = [core.ScopedBlobReference(b) for b in blobs_in] name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join( [str(b) for b in blobs_in] ) # Prepare output blobs blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names( is_training=self.train ) blobs_out = [core.ScopedBlobReference(b) for b in blobs_out] outputs = self.net.Python( CollectAndDistributeFpnRpnProposalsOp(self.train).forward )(blobs_in, blobs_out, name=name) return outputs def DropoutIfTraining(self, blob_in, dropout_rate): """Add dropout to blob_in if the model is in training mode and dropout_rate is > 0.""" blob_out = blob_in if self.train and dropout_rate > 0: blob_out = self.Dropout( blob_in, blob_in, ratio=dropout_rate, is_test=False ) return blob_out def RoIFeatureTransform( self, blobs_in, blob_out, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0 ): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) has_argmax = (method == 'RoIPoolF') if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) bl_out = blob_out + '_fpn' + str(lvl) bl_out_list.append(bl_out) bl_argmax = ['_argmax_' + bl_out] if has_argmax else [] self.net.__getattr__(method)( [bl_in, bl_rois], [bl_out] + bl_argmax, pooled_w=resolution, pooled_h=resolution, spatial_scale=sc, sampling_ratio=sampling_ratio ) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled, _ = self.net.Concat( bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out], axis=0 ) # Unshuffle to match rois from dataloader restore_bl = blob_rois + '_idx_restore_int32' xform_out = self.net.BatchPermutation( [xform_shuffled, restore_bl], blob_out ) else: # Single feature level bl_argmax = ['_argmax_' + blob_out] if has_argmax else [] # sampling_ratio is ignored for RoIPoolF xform_out = self.net.__getattr__(method)( [blobs_in, blob_rois], [blob_out] + bl_argmax, pooled_w=resolution, pooled_h=resolution, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio ) # Only return the first blob (the transformed features) return xform_out[0] if isinstance(xform_out, tuple) else xform_out def ConvShared( self, blob_in, blob_out, dim_in, dim_out, kernel, weight=None, bias=None, **kwargs ): """Add conv op that shares weights and/or biases with another conv op. """ use_bias = ( False if ('no_bias' in kwargs and kwargs['no_bias']) else True ) if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit if use_bias: blobs_in = [blob_in, weight, bias] else: blobs_in = [blob_in, weight] if 'no_bias' in kwargs: del kwargs['no_bias'] return self.net.Conv( blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs ) def BilinearInterpolation( self, blob_in, blob_out, dim_in, dim_out, up_scale ): """Bilinear interpolation in space of scale. Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale Adapted from the CVPR'15 FCN code. See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py """ assert dim_in == dim_out assert up_scale % 2 == 0, 'Scale should be even' def upsample_filt(size): factor = (size + 1) // 2 if size % 2 == 1: center = factor - 1 else: center = factor - 0.5 og = np.ogrid[:size, :size] return ((1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)) kernel_size = up_scale * 2 bil_filt = upsample_filt(kernel_size) kernel = np.zeros( (dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32 ) kernel[range(dim_out), range(dim_in), :, :] = bil_filt blob = self.ConvTranspose( blob_in, blob_out, dim_in, dim_out, kernel_size, stride=int(up_scale), pad=int(up_scale / 2), weight_init=('GivenTensorFill', {'values': kernel}), bias_init=('ConstantFill', {'value': 0.}) ) self.do_not_update_params.append(self.weights[-1]) self.do_not_update_params.append(self.biases[-1]) return blob def ConvAffine( # args in the same order of Conv() self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad, group=1, dilation=1, weight_init=None, bias_init=None, suffix='_bn', inplace=False ): """ConvAffine adds a Conv op followed by a AffineChannel op (which replaces BN during fine tuning). """ conv_blob = self.Conv( blob_in, prefix, dim_in, dim_out, kernel, stride=stride, pad=pad, group=group, dilation=dilation, weight_init=weight_init, bias_init=bias_init, no_bias=1 ) blob_out = self.AffineChannel( conv_blob, prefix + suffix, dim=dim_out, inplace=inplace ) return blob_out def ConvGN( # args in the same order of Conv() self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad, group_gn, # num of groups in gn group=1, dilation=1, weight_init=None, bias_init=None, suffix='_gn', no_conv_bias=1, ): """ConvGN adds a Conv op followed by a GroupNorm op, including learnable scale/bias (gamma/beta) """ conv_blob = self.Conv( blob_in, prefix, dim_in, dim_out, kernel, stride=stride, pad=pad, group=group, dilation=dilation, weight_init=weight_init, bias_init=bias_init, no_bias=no_conv_bias) if group_gn < 1: logger.warning( 'Layer: {} (dim {}): ' 'group_gn < 1; reset to 1.'.format(prefix, dim_in) ) group_gn = 1 blob_out = self.SpatialGN( conv_blob, prefix + suffix, dim_out, group=group_gn, # op's arg name is "group" epsilon=cfg.GROUP_NORM.EPSILON,) self.gn_params.append(self.params[-1]) # add gn's bias to list self.gn_params.append(self.params[-2]) # add gn's scale to list return blob_out def DisableCudnn(self): self.prev_use_cudnn = self.use_cudnn self.use_cudnn = False def RestorePreviousUseCudnn(self): prev_use_cudnn = self.use_cudnn self.use_cudnn = self.prev_use_cudnn self.prev_use_cudnn = prev_use_cudnn def UpdateWorkspaceLr(self, cur_iter, new_lr): """Updates the model's current learning rate and the workspace (learning rate and update history/momentum blobs). """ # The workspace is the one source of truth for the lr # The lr is always the same on all GPUs cur_lr = workspace.FetchBlob('gpu_0/lr')[0] # There are no type conversions between the lr in Python and the lr in # the GPU (both are float32), so exact comparision is ok if cur_lr != new_lr: ratio = _get_lr_change_ratio(cur_lr, new_lr) if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD: logger.info( 'Changing learning rate {:.6f} -> {:.6f} at iter {:d}'. format(cur_lr, new_lr, cur_iter)) self._SetNewLr(cur_lr, new_lr) return new_lr def _SetNewLr(self, cur_lr, new_lr): """Do the actual work of updating the model and workspace blobs. """ for i in range(cfg.NUM_GPUS): with c2_utils.CudaScope(i): workspace.FeedBlob( 'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32)) ratio = _get_lr_change_ratio(cur_lr, new_lr) if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \ ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD: self._CorrectMomentum(new_lr / cur_lr) def _CorrectMomentum(self, correction): """The MomentumSGDUpdate op implements the update V as V := mu * V + lr * grad, where mu is the momentum factor, lr is the learning rate, and grad is the stochastic gradient. Since V is not defined independently of the learning rate (as it should ideally be), when the learning rate is changed we should scale the update history V in order to make it compatible in scale with lr * grad. """ logger.info( 'Scaling update history by {:.6f} (new lr / old lr)'. format(correction)) for i in range(cfg.NUM_GPUS): with c2_utils.CudaScope(i): for param in self.TrainableParams(gpu_id=i): op = core.CreateOperator( 'Scale', [param + '_momentum'], [param + '_momentum'], scale=correction) workspace.RunOperatorOnce(op) def GetLossScale(self): """Allow a way to configure the loss scale dynamically. This may be used in a distributed data parallel setting. """ return 1.0 / cfg.NUM_GPUS def AddLosses(self, losses): if not isinstance(losses, list): losses = [losses] # Conversion to str allows losses to include BlobReferences losses = [c2_utils.UnscopeName(str(l)) for l in losses] self.losses = list(set(self.losses + losses)) def AddMetrics(self, metrics): if not isinstance(metrics, list): metrics = [metrics] self.metrics = list(set(self.metrics + metrics)) def _get_lr_change_ratio(cur_lr, new_lr): eps = 1e-10 ratio = np.max( (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps))) ) return ratio ================================================ FILE: detectron/modeling/fast_rcnn_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Various network "heads" for classification and bounding box prediction. The design is as follows: ... -> RoI ----\ /-> box cls output -> cls loss -> RoIFeatureXform -> box head ... -> Feature / \-> box reg output -> reg loss Map The Fast R-CNN head produces a feature representation of the RoI for the purpose of bounding box classification and regression. The box output module converts the feature representation into classification and regression predictions. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill from detectron.utils.net import get_group_gn import detectron.utils.blob as blob_utils # ---------------------------------------------------------------------------- # # Fast R-CNN outputs and losses # ---------------------------------------------------------------------------- # def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.FC( blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) def add_fast_rcnn_losses(model): """Add losses for RoI classification and bounding box regression.""" cls_prob, loss_cls = model.net.SoftmaxWithLoss( ['cls_score', 'labels_int32'], ['cls_prob', 'loss_cls'], scale=model.GetLossScale() ) loss_bbox = model.net.SmoothL1Loss( [ 'bbox_pred', 'bbox_targets', 'bbox_inside_weights', 'bbox_outside_weights' ], 'loss_bbox', scale=model.GetLossScale() ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox]) model.Accuracy(['cls_prob', 'labels_int32'], 'accuracy_cls') model.AddLosses(['loss_cls', 'loss_bbox']) model.AddMetrics('accuracy_cls') return loss_gradients # ---------------------------------------------------------------------------- # # Box heads # ---------------------------------------------------------------------------- # def add_roi_2mlp_head(model, blob_in, dim_in, spatial_scale): """Add a ReLU MLP with two hidden layers.""" hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC(roi_feat, 'fc6', dim_in * roi_size * roi_size, hidden_dim) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', hidden_dim, hidden_dim) model.Relu('fc7', 'fc7') return 'fc7', hidden_dim def add_roi_Xconv1fc_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 1fc head, as a reference if not using GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.Conv( current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.}), no_bias=0) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6', 'fc6') return 'fc6', fc_dim def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6', 'fc6') return 'fc6', fc_dim ================================================ FILE: detectron/modeling/generate_anchors.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick and Sean Bell # -------------------------------------------------------- import numpy as np # Verify that we compute the same anchors as Shaoqing's matlab implementation: # # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat # >> anchors # # anchors = # # -83 -39 100 56 # -175 -87 192 104 # -359 -183 376 200 # -55 -55 72 72 # -119 -119 136 136 # -247 -247 264 264 # -35 -79 52 96 # -79 -167 96 184 # -167 -343 184 360 # array([[ -83., -39., 100., 56.], # [-175., -87., 192., 104.], # [-359., -183., 376., 200.], # [ -55., -55., 72., 72.], # [-119., -119., 136., 136.], # [-247., -247., 264., 264.], # [ -35., -79., 52., 96.], # [ -79., -167., 96., 184.], # [-167., -343., 184., 360.]]) def generate_anchors( stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2) ): """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors are centered on stride / 2, have (approximate) sqrt areas of the specified sizes, and aspect ratios as given. """ return _generate_anchors( stride, np.array(sizes, dtype=float) / stride, np.array(aspect_ratios, dtype=float) ) def _generate_anchors(base_size, scales, aspect_ratios): """Generate anchor (reference) windows by enumerating aspect ratios X scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. """ anchor = np.array([1, 1, base_size, base_size], dtype=float) - 1 anchors = _ratio_enum(anchor, aspect_ratios) anchors = np.vstack( [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] ) return anchors def _whctrs(anchor): """Return width, height, x center, and y center for an anchor (window).""" w = anchor[2] - anchor[0] + 1 h = anchor[3] - anchor[1] + 1 x_ctr = anchor[0] + 0.5 * (w - 1) y_ctr = anchor[1] + 0.5 * (h - 1) return w, h, x_ctr, y_ctr def _mkanchors(ws, hs, x_ctr, y_ctr): """Given a vector of widths (ws) and heights (hs) around a center (x_ctr, y_ctr), output a set of anchors (windows). """ ws = ws[:, np.newaxis] hs = hs[:, np.newaxis] anchors = np.hstack( ( x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) ) ) return anchors def _ratio_enum(anchor, ratios): """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" w, h, x_ctr, y_ctr = _whctrs(anchor) size = w * h size_ratios = size / ratios ws = np.round(np.sqrt(size_ratios)) hs = np.round(ws * ratios) anchors = _mkanchors(ws, hs, x_ctr, y_ctr) return anchors def _scale_enum(anchor, scales): """Enumerate a set of anchors for each scale wrt an anchor.""" w, h, x_ctr, y_ctr = _whctrs(anchor) ws = w * scales hs = h * scales anchors = _mkanchors(ws, hs, x_ctr, y_ctr) return anchors ================================================ FILE: detectron/modeling/keypoint_rcnn_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Various network "heads" for predicting keypoints in Mask R-CNN. The design is as follows: ... -> RoI ----\ -> RoIFeatureXform -> keypoint head -> keypoint output -> loss ... -> Feature / Map The keypoint head produces a feature representation of the RoI for the purpose of keypoint prediction. The keypoint output module converts the feature representation into keypoint heatmaps. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill import detectron.modeling.ResNet as ResNet import detectron.utils.blob as blob_utils # ---------------------------------------------------------------------------- # # Keypoint R-CNN outputs and losses # ---------------------------------------------------------------------------- # def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out def add_keypoint_losses(model): """Add Mask R-CNN keypoint specific losses.""" # Reshape input from (N, K, H, W) to (NK, HW) model.net.Reshape( ['kps_score'], ['kps_score_reshaped', '_kps_score_old_shape'], shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE) ) # Softmax across **space** (woahh....space!) # Note: this is not what is commonly called "spatial softmax" # (i.e., softmax applied along the channel dimension at each spatial # location); This is softmax applied over a set of spatial locations (i.e., # each spatial location is a "class"). kps_prob, loss_kps = model.net.SoftmaxWithLoss( ['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'], ['kps_prob', 'loss_kps'], scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS, spatial=0 ) if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: # Discussion: the softmax loss above will average the loss by the sum of # keypoint_weights, i.e. the total number of visible keypoints. Since # the number of visible keypoints can vary significantly between # minibatches, this has the effect of up-weighting the importance of # minibatches with few visible keypoints. (Imagine the extreme case of # only one visible keypoint versus N: in the case of N, each one # contributes 1/N to the gradient compared to the single keypoint # determining the gradient direction). Instead, we can normalize the # loss by the total number of keypoints, if it were the case that all # keypoints were visible in a full minibatch. (Returning to the example, # this means that the one visible keypoint contributes as much as each # of the N keypoints.) model.StopGradient( 'keypoint_loss_normalizer', 'keypoint_loss_normalizer' ) loss_kps = model.net.Mul( ['loss_kps', 'keypoint_loss_normalizer'], 'loss_kps_normalized' ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps]) model.AddLosses(loss_kps) return loss_gradients # ---------------------------------------------------------------------------- # # Keypoint heads # ---------------------------------------------------------------------------- # def add_ResNet_roi_conv5_head_for_keypoints( model, blob_in, dim_in, spatial_scale ): """Add a ResNet "conv5" / "stage5" head for Mask R-CNN keypoint prediction. """ model.RoIFeatureTransform( blob_in, '_[pose]_pool5', blob_rois='keypoint_rois', method=cfg.KRCNN.ROI_XFORM_METHOD, resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) # Using the prefix '_[pose]_' to 'res5' enables initializing the head's # parameters using pretrained 'res5' parameters if given (see # utils.net.initialize_from_weights_file) s, dim_in = ResNet.add_stage( model, '_[pose]_res5', '_[pose]_pool5', 3, dim_in, 2048, 512, cfg.KRCNN.DILATION, stride_init=int(cfg.KRCNN.ROI_XFORM_RESOLUTION / 7) ) return s, 2048 def add_roi_pose_head_v1convX(model, blob_in, dim_in, spatial_scale): """Add a Mask R-CNN keypoint head. v1convX design: X * (conv).""" hidden_dim = cfg.KRCNN.CONV_HEAD_DIM kernel_size = cfg.KRCNN.CONV_HEAD_KERNEL pad_size = kernel_size // 2 current = model.RoIFeatureTransform( blob_in, '_[pose]_roi_feat', blob_rois='keypoint_rois', method=cfg.KRCNN.ROI_XFORM_METHOD, resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) for i in range(cfg.KRCNN.NUM_STACKED_CONVS): current = model.Conv( current, 'conv_fcn' + str(i + 1), dim_in, hidden_dim, kernel_size, stride=1, pad=pad_size, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.01}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = hidden_dim return current, hidden_dim ================================================ FILE: detectron/modeling/mask_rcnn_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Various network "heads" for predicting masks in Mask R-CNN. The design is as follows: ... -> RoI ----\ -> RoIFeatureXform -> mask head -> mask output -> loss ... -> Feature / Map The mask head produces a feature representation of the RoI for the purpose of mask prediction. The mask output module converts the feature representation into real-valued (soft) masks. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill from detectron.utils.net import get_group_gn import detectron.modeling.ResNet as ResNet import detectron.utils.blob as blob_utils # ---------------------------------------------------------------------------- # # Mask R-CNN outputs and losses # ---------------------------------------------------------------------------- # def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2) blob_out = model.FC( blob_in, 'mask_fcn_logits', dim_fc, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out def add_mask_rcnn_losses(model, blob_mask): """Add Mask R-CNN specific losses.""" loss_mask = model.net.SigmoidCrossEntropyLoss( [blob_mask, 'masks_int32'], 'loss_mask', scale=model.GetLossScale() * cfg.MRCNN.WEIGHT_LOSS_MASK ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask]) model.AddLosses('loss_mask') return loss_gradients # ---------------------------------------------------------------------------- # # Mask heads # ---------------------------------------------------------------------------- # def mask_rcnn_fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale): """v1up design: 4 * (conv 3x3), convT 2x2.""" return mask_rcnn_fcn_head_v1upXconvs( model, blob_in, dim_in, spatial_scale, 4 ) def mask_rcnn_fcn_head_v1up4convs_gn(model, blob_in, dim_in, spatial_scale): """v1up design: 4 * (conv 3x3), convT 2x2, with GroupNorm""" return mask_rcnn_fcn_head_v1upXconvs_gn( model, blob_in, dim_in, spatial_scale, 4 ) def mask_rcnn_fcn_head_v1up(model, blob_in, dim_in, spatial_scale): """v1up design: 2 * (conv 3x3), convT 2x2.""" return mask_rcnn_fcn_head_v1upXconvs( model, blob_in, dim_in, spatial_scale, 2 ) def mask_rcnn_fcn_head_v1upXconvs( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2.""" current = model.RoIFeatureTransform( blob_in, blob_out='_[mask]_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.Conv( current, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, dilation=dilation, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner def mask_rcnn_fcn_head_v1upXconvs_gn( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN( current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs( ['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced'] ) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), # std only for gauss bias_init=const_fill(0.0) ) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale): """v0up design: conv5, deconv 2x2 (no weight sharing with the box head).""" blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=('GaussianFill', {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced def add_ResNet_roi_conv5_head_for_masks(model, blob_in, dim_in, spatial_scale): """Add a ResNet "conv5" / "stage5" head for predicting masks.""" model.RoIFeatureTransform( blob_in, blob_out='_[mask]_pool5', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION stride_init = int(cfg.MRCNN.ROI_XFORM_RESOLUTION / 7) # by default: 2 s, dim_in = ResNet.add_stage( model, '_[mask]_res5', '_[mask]_pool5', 3, dim_in, 2048, 512, dilation, stride_init=stride_init ) return s, 2048 ================================================ FILE: detectron/modeling/model_builder.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Detectron model construction functions. Detectron supports a large number of model types. The configuration space is large. To get a sense, a given model is in element in the cartesian product of: - backbone (e.g., VGG16, ResNet, ResNeXt) - FPN (on or off) - RPN only (just proposals) - Fixed proposals for Fast R-CNN, RFCN, Mask R-CNN (with or without keypoints) - End-to-end model with RPN + Fast R-CNN (i.e., Faster R-CNN), Mask R-CNN, ... - Different "head" choices for the model - ... many configuration options ... A given model is made by combining many basic components. The result is flexible though somewhat complex to understand at first. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import copy import importlib import logging from caffe2.python import core from caffe2.python import workspace from detectron.core.config import cfg from detectron.modeling.detector import DetectionModelHelper from detectron.roi_data.loader import RoIDataLoader import detectron.modeling.fast_rcnn_heads as fast_rcnn_heads import detectron.modeling.keypoint_rcnn_heads as keypoint_rcnn_heads import detectron.modeling.mask_rcnn_heads as mask_rcnn_heads import detectron.modeling.name_compat as name_compat import detectron.modeling.optimizer as optim import detectron.modeling.retinanet_heads as retinanet_heads import detectron.modeling.rfcn_heads as rfcn_heads import detectron.modeling.rpn_heads as rpn_heads import detectron.roi_data.minibatch as roi_data_minibatch import detectron.utils.c2 as c2_utils logger = logging.getLogger(__name__) # ---------------------------------------------------------------------------- # # Generic recomposable model builders # # For example, you can create a Fast R-CNN model with the ResNet-50-C4 backbone # with the configuration: # # MODEL: # TYPE: generalized_rcnn # CONV_BODY: ResNet.add_ResNet50_conv4_body # ROI_HEAD: ResNet.add_ResNet_roi_conv5_head # ---------------------------------------------------------------------------- # def generalized_rcnn(model): """This model type handles: - Fast R-CNN - RPN only (not integrated with Fast R-CNN) - Faster R-CNN (stagewise training from NIPS paper) - Faster R-CNN (end-to-end joint training) - Mask R-CNN (stagewise training from NIPS paper) - Mask R-CNN (end-to-end joint training) """ return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD), add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD), add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD), freeze_conv_body=cfg.TRAIN.FREEZE_CONV_BODY ) def rfcn(model): # TODO(rbg): fold into build_generic_detection_model return build_generic_rfcn_model(model, get_func(cfg.MODEL.CONV_BODY)) def retinanet(model): # TODO(rbg): fold into build_generic_detection_model return build_generic_retinanet_model(model, get_func(cfg.MODEL.CONV_BODY)) # ---------------------------------------------------------------------------- # # Helper functions for building various re-usable network bits # ---------------------------------------------------------------------------- # def create(model_type_func, train=False, gpu_id=0): """Generic model creation function that dispatches to specific model building functions. By default, this function will generate a data parallel model configured to run on cfg.NUM_GPUS devices. However, you can restrict it to build a model targeted to a specific GPU by specifying gpu_id. This is used by optimizer.build_data_parallel_model() during test time. """ model = DetectionModelHelper( name=model_type_func, train=train, num_classes=cfg.MODEL.NUM_CLASSES, init_params=train ) model.only_build_forward_pass = False model.target_gpu_id = gpu_id return get_func(model_type_func)(model) def get_func(func_name): """Helper to return a function object by name. func_name must identify a function in this module or the path to a function relative to the base 'modeling' module. """ if func_name == '': return None new_func_name = name_compat.get_new_name(func_name) if new_func_name != func_name: logger.warn( 'Remapping old function name: {} -> {}'. format(func_name, new_func_name) ) func_name = new_func_name try: parts = func_name.split('.') # Refers to a function in this module if len(parts) == 1: return globals()[parts[0]] # Otherwise, assume we're referencing a module under modeling module_name = 'detectron.modeling.' + '.'.join(parts[:-1]) module = importlib.import_module(module_name) return getattr(module, parts[-1]) except Exception: logger.error('Failed to find function: {}'.format(func_name)) raise def build_generic_detection_model( model, add_conv_body_func, add_roi_box_head_func=None, add_roi_mask_head_func=None, add_roi_keypoint_head_func=None, freeze_conv_body=False ): def _single_gpu_build_func(model): """Build the model on a single GPU. Can be called in a loop over GPUs with name and device scoping to create a data parallel model. """ # Add the conv body (called "backbone architecture" in papers) # E.g., ResNet-50, ResNet-50-FPN, ResNeXt-101-FPN, etc. blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model) if freeze_conv_body: for b in c2_utils.BlobReferenceList(blob_conv): model.StopGradient(b, b) if not model.train: # == inference # Create a net that can be used to execute the conv body on an image # (without also executing RPN or any other network heads) model.conv_body_net = model.net.Clone('conv_body_net') head_loss_gradients = { 'rpn': None, 'box': None, 'mask': None, 'keypoints': None, } if cfg.RPN.RPN_ON: # Add the RPN head head_loss_gradients['rpn'] = rpn_heads.add_generic_rpn_outputs( model, blob_conv, dim_conv, spatial_scale_conv ) if cfg.FPN.FPN_ON: # After adding the RPN head, restrict FPN blobs and scales to # those used in the RoI heads blob_conv, spatial_scale_conv = _narrow_to_fpn_roi_levels( blob_conv, spatial_scale_conv ) if not cfg.MODEL.RPN_ONLY: # Add the Fast R-CNN head head_loss_gradients['box'] = _add_fast_rcnn_head( model, add_roi_box_head_func, blob_conv, dim_conv, spatial_scale_conv ) if cfg.MODEL.MASK_ON: # Add the mask head head_loss_gradients['mask'] = _add_roi_mask_head( model, add_roi_mask_head_func, blob_conv, dim_conv, spatial_scale_conv ) if cfg.MODEL.KEYPOINTS_ON: # Add the keypoint head head_loss_gradients['keypoint'] = _add_roi_keypoint_head( model, add_roi_keypoint_head_func, blob_conv, dim_conv, spatial_scale_conv ) if model.train: loss_gradients = {} for lg in head_loss_gradients.values(): if lg is not None: loss_gradients.update(lg) return loss_gradients else: return None optim.build_data_parallel_model(model, _single_gpu_build_func) return model def _narrow_to_fpn_roi_levels(blobs, spatial_scales): """Return only the blobs and spatial scales that will be used for RoI heads. Inputs `blobs` and `spatial_scales` may include extra blobs and scales that are used for RPN proposals, but not for RoI heads. """ # Code only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 return blobs[-num_roi_levels:], spatial_scales[-num_roi_levels:] def _add_fast_rcnn_head( model, add_roi_box_head_func, blob_in, dim_in, spatial_scale_in ): """Add a Fast R-CNN head to the model.""" blob_frcn, dim_frcn = add_roi_box_head_func( model, blob_in, dim_in, spatial_scale_in ) fast_rcnn_heads.add_fast_rcnn_outputs(model, blob_frcn, dim_frcn) if model.train: loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model) else: loss_gradients = None return loss_gradients def _add_roi_mask_head( model, add_roi_mask_head_func, blob_in, dim_in, spatial_scale_in ): """Add a mask prediction head to the model.""" # Capture model graph before adding the mask head bbox_net = copy.deepcopy(model.net.Proto()) # Add the mask head blob_mask_head, dim_mask_head = add_roi_mask_head_func( model, blob_in, dim_in, spatial_scale_in ) # Add the mask output blob_mask = mask_rcnn_heads.add_mask_rcnn_outputs( model, blob_mask_head, dim_mask_head ) if not model.train: # == inference # Inference uses a cascade of box predictions, then mask predictions. # This requires separate nets for box and mask prediction. # So we extract the mask prediction net, store it as its own network, # then restore model.net to be the bbox-only network model.mask_net, blob_mask = c2_utils.SuffixNet( 'mask_net', model.net, len(bbox_net.op), blob_mask ) model.net._net = bbox_net loss_gradients = None else: loss_gradients = mask_rcnn_heads.add_mask_rcnn_losses(model, blob_mask) return loss_gradients def _add_roi_keypoint_head( model, add_roi_keypoint_head_func, blob_in, dim_in, spatial_scale_in ): """Add a keypoint prediction head to the model.""" # Capture model graph before adding the mask head bbox_net = copy.deepcopy(model.net.Proto()) # Add the keypoint head blob_keypoint_head, dim_keypoint_head = add_roi_keypoint_head_func( model, blob_in, dim_in, spatial_scale_in ) # Add the keypoint output blob_keypoint = keypoint_rcnn_heads.add_keypoint_outputs( model, blob_keypoint_head, dim_keypoint_head ) if not model.train: # == inference # Inference uses a cascade of box predictions, then keypoint predictions # This requires separate nets for box and keypoint prediction. # So we extract the keypoint prediction net, store it as its own # network, then restore model.net to be the bbox-only network model.keypoint_net, keypoint_blob_out = c2_utils.SuffixNet( 'keypoint_net', model.net, len(bbox_net.op), blob_keypoint ) model.net._net = bbox_net loss_gradients = None else: loss_gradients = keypoint_rcnn_heads.add_keypoint_losses(model) return loss_gradients def build_generic_rfcn_model(model, add_conv_body_func, dim_reduce=None): # TODO(rbg): fold this function into build_generic_detection_model def _single_gpu_build_func(model): """Builds the model on a single GPU. Can be called in a loop over GPUs with name and device scoping to create a data parallel model.""" blob, dim, spatial_scale = add_conv_body_func(model) if not model.train: model.conv_body_net = model.net.Clone('conv_body_net') rfcn_heads.add_rfcn_outputs(model, blob, dim, dim_reduce, spatial_scale) if model.train: loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model) return loss_gradients if model.train else None optim.build_data_parallel_model(model, _single_gpu_build_func) return model def build_generic_retinanet_model( model, add_conv_body_func, freeze_conv_body=False ): # TODO(rbg): fold this function into build_generic_detection_model def _single_gpu_build_func(model): """Builds the model on a single GPU. Can be called in a loop over GPUs with name and device scoping to create a data parallel model.""" blobs, dim, spatial_scales = add_conv_body_func(model) if not model.train: model.conv_body_net = model.net.Clone('conv_body_net') retinanet_heads.add_fpn_retinanet_outputs( model, blobs, dim, spatial_scales ) if model.train: loss_gradients = retinanet_heads.add_fpn_retinanet_losses( model ) return loss_gradients if model.train else None optim.build_data_parallel_model(model, _single_gpu_build_func) return model # ---------------------------------------------------------------------------- # # Network inputs # ---------------------------------------------------------------------------- # def add_training_inputs(model, roidb=None): """Create network input ops and blobs used for training. To be called *after* model_builder.create(). """ # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. assert model.train, 'Training inputs can only be added to a trainable model' if roidb is not None: # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op) def add_inference_inputs(model): """Create network input blobs used for inference.""" def create_input_blobs_for_net(net_def): for op in net_def.op: for blob_in in op.input: if not workspace.HasBlob(blob_in): workspace.CreateBlob(blob_in) create_input_blobs_for_net(model.net.Proto()) if cfg.MODEL.MASK_ON: create_input_blobs_for_net(model.mask_net.Proto()) if cfg.MODEL.KEYPOINTS_ON: create_input_blobs_for_net(model.keypoint_net.Proto()) # ---------------------------------------------------------------------------- # # ********************** DEPRECATED FUNCTIONALITY BELOW ********************** # # ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- # # Hardcoded functions to create various types of common models # # *** This type of model definition is deprecated *** # *** Use the generic composable versions instead *** # # ---------------------------------------------------------------------------- # import detectron.modeling.ResNet as ResNet import detectron.modeling.VGG16 as VGG16 import detectron.modeling.VGG_CNN_M_1024 as VGG_CNN_M_1024 def fast_rcnn(model): logger.warn('Deprecated: use `MODEL.TYPE: generalized_rcnn`.') return generalized_rcnn(model) def mask_rcnn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.MASK_ON: True`' ) return generalized_rcnn(model) def keypoint_rcnn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.KEYPOINTS_ON: True`' ) return generalized_rcnn(model) def mask_and_keypoint_rcnn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.MASK_ON: True and ``MODEL.KEYPOINTS_ON: True`' ) return generalized_rcnn(model) def rpn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.RPN_ONLY: True`' ) return generalized_rcnn(model) def fpn_rpn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.RPN_ONLY: True` and FPN enabled via configs' ) return generalized_rcnn(model) def faster_rcnn(model): logger.warn( 'Deprecated: use `MODEL.TYPE: generalized_rcnn` with ' '`MODEL.FASTER_RCNN: True`' ) return generalized_rcnn(model) def fast_rcnn_frozen_features(model): logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead') return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD), freeze_conv_body=True ) def rpn_frozen_features(model): logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead') return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True ) def fpn_rpn_frozen_features(model): logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead') return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True ) def mask_rcnn_frozen_features(model): logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead') return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD), add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD), freeze_conv_body=True ) def keypoint_rcnn_frozen_features(model): logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead') return build_generic_detection_model( model, get_func(cfg.MODEL.CONV_BODY), add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD), add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD), freeze_conv_body=True ) # ---------------------------------------------------------------------------- # # Fast R-CNN models # ---------------------------------------------------------------------------- # def VGG_CNN_M_1024_fast_rcnn(model): return build_generic_detection_model( model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body, VGG_CNN_M_1024.add_VGG_CNN_M_1024_roi_fc_head ) def VGG16_fast_rcnn(model): return build_generic_detection_model( model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head ) def ResNet50_fast_rcnn(model): return build_generic_detection_model( model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head ) def ResNet101_fast_rcnn(model): return build_generic_detection_model( model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head ) def ResNet50_fast_rcnn_frozen_features(model): return build_generic_detection_model( model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head, freeze_conv_body=True ) def ResNet101_fast_rcnn_frozen_features(model): return build_generic_detection_model( model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head, freeze_conv_body=True ) # ---------------------------------------------------------------------------- # # RPN-only models # ---------------------------------------------------------------------------- # def VGG_CNN_M_1024_rpn(model): return build_generic_detection_model( model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body ) def VGG16_rpn(model): return build_generic_detection_model(model, VGG16.add_VGG16_conv5_body) def ResNet50_rpn_conv4(model): return build_generic_detection_model(model, ResNet.add_ResNet50_conv4_body) def ResNet101_rpn_conv4(model): return build_generic_detection_model(model, ResNet.add_ResNet101_conv4_body) def VGG_CNN_M_1024_rpn_frozen_features(model): return build_generic_detection_model( model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body, freeze_conv_body=True ) def VGG16_rpn_frozen_features(model): return build_generic_detection_model( model, VGG16.add_VGG16_conv5_body, freeze_conv_body=True ) def ResNet50_rpn_conv4_frozen_features(model): return build_generic_detection_model( model, ResNet.add_ResNet50_conv4_body, freeze_conv_body=True ) def ResNet101_rpn_conv4_frozen_features(model): return build_generic_detection_model( model, ResNet.add_ResNet101_conv4_body, freeze_conv_body=True ) # ---------------------------------------------------------------------------- # # Faster R-CNN models # ---------------------------------------------------------------------------- # def VGG16_faster_rcnn(model): assert cfg.MODEL.FASTER_RCNN return build_generic_detection_model( model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head ) def ResNet50_faster_rcnn(model): assert cfg.MODEL.FASTER_RCNN return build_generic_detection_model( model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head ) def ResNet101_faster_rcnn(model): assert cfg.MODEL.FASTER_RCNN return build_generic_detection_model( model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head ) # ---------------------------------------------------------------------------- # # R-FCN models # ---------------------------------------------------------------------------- # def ResNet50_rfcn(model): return build_generic_rfcn_model( model, ResNet.add_ResNet50_conv5_body, dim_reduce=1024 ) def ResNet101_rfcn(model): return build_generic_rfcn_model( model, ResNet.add_ResNet101_conv5_body, dim_reduce=1024 ) ================================================ FILE: detectron/modeling/name_compat.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Handle mapping from old network building function names to new names. Flexible network configuration is achieved by specifying the function name that builds a network module (e.g., the name of the conv backbone or the mask roi head). However we may wish to change names over time without breaking previous config files. This module provides backwards naming compatibility by providing a mapping from the old name to the new name. When renaming functions, it's generally a good idea to codemod existing yaml config files. An easy way to batch edit, by example, is a shell command like $ find . -name "*.yaml" -exec sed -i -e \ 's/head_builder\.add_roi_2mlp_head/fast_rcnn_heads.add_roi_2mlp_head/g' {} \; to perform the renaming: head_builder.add_roi_2mlp_head => fast_rcnn_heads.add_roi_2mlp_head """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals _RENAME = { # Removed "ResNet_" from the name because it wasn't relevent 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs': 'mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs', # Removed "ResNet_" from the name because it wasn't relevent 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up': 'mask_rcnn_heads.mask_rcnn_fcn_head_v1up', # Removed "ResNet_" from the name because it wasn't relevent 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0upshare': 'mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare', # Removed "ResNet_" from the name because it wasn't relevent 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0up': 'mask_rcnn_heads.mask_rcnn_fcn_head_v0up', # Removed head_builder module in favor of the more specific fast_rcnn name 'head_builder.add_roi_2mlp_head': 'fast_rcnn_heads.add_roi_2mlp_head', } def get_new_name(func_name): if func_name in _RENAME: func_name = _RENAME[func_name] return func_name ================================================ FILE: detectron/modeling/optimizer.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Optimization operator graph construction.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging from caffe2.python import muji from detectron.core.config import cfg import detectron.utils.c2 as c2_utils logger = logging.getLogger(__name__) def build_data_parallel_model(model, single_gpu_build_func): """Build a data parallel model given a function that builds the model on a single GPU. """ if model.only_build_forward_pass: single_gpu_build_func(model) elif model.train: all_loss_gradients = _build_forward_graph(model, single_gpu_build_func) # Add backward pass on all GPUs model.AddGradientOperators(all_loss_gradients) if cfg.NUM_GPUS > 1: _add_allreduce_graph(model) for gpu_id in range(cfg.NUM_GPUS): # After allreduce, all GPUs perform SGD updates on their identical # params and gradients in parallel with c2_utils.NamedCudaScope(gpu_id): add_single_gpu_param_update_ops(model, gpu_id) else: # Test-time network operates on single GPU # Test-time parallelism is implemented through multiprocessing with c2_utils.NamedCudaScope(model.target_gpu_id): single_gpu_build_func(model) def _build_forward_graph(model, single_gpu_build_func): """Construct the forward graph on each GPU.""" all_loss_gradients = {} # Will include loss gradients from all GPUs # Build the model on each GPU with correct name and device scoping for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): all_loss_gradients.update(single_gpu_build_func(model)) return all_loss_gradients def _add_allreduce_graph(model): """Construct the graph that performs Allreduce on the gradients.""" # Need to all-reduce the per-GPU gradients if training with more than 1 GPU all_params = model.TrainableParams() assert len(all_params) % cfg.NUM_GPUS == 0 # The model parameters are replicated on each GPU, get the number # distinct parameter blobs (i.e., the number of parameter blobs on # each GPU) params_per_gpu = int(len(all_params) / cfg.NUM_GPUS) with c2_utils.CudaScope(0): # Iterate over distinct parameter blobs for i in range(params_per_gpu): # Gradients from all GPUs for this parameter blob gradients = [ model.param_to_grad[p] for p in all_params[i::params_per_gpu] ] if len(gradients) > 0: if cfg.USE_NCCL: model.net.NCCLAllreduce(gradients, gradients) else: muji.Allreduce(model.net, gradients, reduced_affix='') def add_single_gpu_param_update_ops(model, gpu_id): # Learning rate of 0 is a dummy value to be set properly at the # start of training lr = model.param_init_net.ConstantFill( [], 'lr', shape=[1], value=0.0 ) one = model.param_init_net.ConstantFill( [], 'one', shape=[1], value=1.0 ) wd = model.param_init_net.ConstantFill( [], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY ) # weight decay of GroupNorm's parameters wd_gn = model.param_init_net.ConstantFill( [], 'wd_gn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_GN ) for param in model.TrainableParams(gpu_id=gpu_id): logger.debug('param ' + str(param) + ' will be updated') param_grad = model.param_to_grad[param] # Initialize momentum vector param_momentum = model.param_init_net.ConstantFill( [param], param + '_momentum', value=0.0 ) if param in model.biases: # Special treatment for biases (mainly to match historical impl. # details): # (1) Do not apply weight decay # (2) Use a 2x higher learning rate model.Scale(param_grad, param_grad, scale=2.0) elif param in model.gn_params: # Special treatment for GroupNorm's parameters model.WeightedSum([param_grad, one, param, wd_gn], param_grad) elif cfg.SOLVER.WEIGHT_DECAY > 0: # Apply weight decay to non-bias weights model.WeightedSum([param_grad, one, param, wd], param_grad) # Update param_grad and param_momentum in place model.net.MomentumSGDUpdate( [param_grad, param_momentum, lr, param], [param_grad, param_momentum, param], momentum=cfg.SOLVER.MOMENTUM ) ================================================ FILE: detectron/modeling/retinanet_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """RetinaNet model heads and losses. See: https://arxiv.org/abs/1708.02002.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np from detectron.core.config import cfg import detectron.utils.blob as blob_utils def get_retinanet_bias_init(model): """Initialize the biases for the conv ops that predict class probabilities. Initialization is performed such that at the start of training, all locations are predicted to be background with high probability (e.g., ~0.99 = 1 - cfg.RETINANET.PRIOR_PROB). See the Focal Loss paper for details. """ prior_prob = cfg.RETINANET.PRIOR_PROB scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) if cfg.RETINANET.SOFTMAX: # Multiclass softmax case bias = np.zeros((model.num_classes, 1), dtype=np.float32) bias[0] = np.log( (model.num_classes - 1) * (1 - prior_prob) / (prior_prob) ) bias = np.vstack( [bias for _ in range(scales_per_octave * aspect_ratios)] ) bias_init = ( 'GivenTensorFill', { 'values': bias.astype(dtype=np.float32) } ) else: # Per-class sigmoid (binary classification) case bias_init = ( 'ConstantFill', { 'value': -np.log((1 - prior_prob) / prior_prob) } ) return bias_init def add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales): """RetinaNet head. For classification and box regression, we can chose to have the same conv tower or a separate tower. "bl_feat_list" stores the list of feature blobs for bbox prediction. These blobs can be shared cls feature blobs if we share the tower or else are independent blobs. """ dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE # compute init for bias bias_init = get_retinanet_bias_init(model) assert len(blobs_in) == k_max - k_min + 1 bbox_feat_list = [] cls_pred_dim = ( model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1) ) # unpacked bbox feature and add prediction layers bbox_regr_dim = ( 4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4 ) # ========================================================================== # classification tower with logits and prob prediction # ========================================================================== for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order # classification tower stack convolution starts for nconv in range(cfg.RETINANET.NUM_CONVS): suffix = 'n{}_fpn{}'.format(nconv, lvl) dim_in, dim_out = dim_in, dim_in if lvl == k_min: bl_out = model.Conv( bl_in, 'retnet_cls_conv_' + suffix, dim_in, dim_out, 3, stride=1, pad=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. }) ) else: bl_out = model.ConvShared( bl_in, 'retnet_cls_conv_' + suffix, dim_in, dim_out, 3, stride=1, pad=1, weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min), bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min) ) bl_in = model.Relu(bl_out, bl_out) bl_feat = bl_in # cls tower stack convolution ends. Add the logits layer now if lvl == k_min: retnet_cls_pred = model.Conv( bl_feat, 'retnet_cls_pred_fpn{}'.format(lvl), dim_in, cls_pred_dim * A, 3, pad=1, stride=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=bias_init ) else: retnet_cls_pred = model.ConvShared( bl_feat, 'retnet_cls_pred_fpn{}'.format(lvl), dim_in, cls_pred_dim * A, 3, pad=1, stride=1, weight='retnet_cls_pred_fpn{}_w'.format(k_min), bias='retnet_cls_pred_fpn{}_b'.format(k_min) ) if not model.train: if cfg.RETINANET.SOFTMAX: model.net.GroupSpatialSoftmax( retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl), num_classes=cls_pred_dim ) else: model.net.Sigmoid( retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl) ) if cfg.RETINANET.SHARE_CLS_BBOX_TOWER: bbox_feat_list.append(bl_feat) # ========================================================================== # bbox tower if not sharing features with the classification tower with # logits and prob prediction # ========================================================================== if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER: for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order for nconv in range(cfg.RETINANET.NUM_CONVS): suffix = 'n{}_fpn{}'.format(nconv, lvl) dim_in, dim_out = dim_in, dim_in if lvl == k_min: bl_out = model.Conv( bl_in, 'retnet_bbox_conv_' + suffix, dim_in, dim_out, 3, stride=1, pad=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. }) ) else: bl_out = model.ConvShared( bl_in, 'retnet_bbox_conv_' + suffix, dim_in, dim_out, 3, stride=1, pad=1, weight='retnet_bbox_conv_n{}_fpn{}_w'.format( nconv, k_min ), bias='retnet_bbox_conv_n{}_fpn{}_b'.format( nconv, k_min ) ) bl_in = model.Relu(bl_out, bl_out) # Add octave scales and aspect ratio # At least 1 convolution for dealing different aspect ratios bl_feat = bl_in bbox_feat_list.append(bl_feat) # Depending on the features [shared/separate] for bbox, add prediction layer for i, lvl in enumerate(range(k_min, k_max + 1)): bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl) bl_feat = bbox_feat_list[i] if lvl == k_min: model.Conv( bl_feat, bbox_pred, dim_in, bbox_regr_dim * A, 3, pad=1, stride=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. }) ) else: model.ConvShared( bl_feat, bbox_pred, dim_in, bbox_regr_dim * A, 3, pad=1, stride=1, weight='retnet_bbox_pred_fpn{}_w'.format(k_min), bias='retnet_bbox_pred_fpn{}_b'.format(k_min) ) def add_fpn_retinanet_losses(model): loss_gradients = {} gradients, losses = [], [] k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid model.AddMetrics(['retnet_fg_num', 'retnet_bg_num']) # ========================================================================== # bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location # ========================================================================== for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) bbox_loss = model.net.SelectSmoothL1Loss( [ 'retnet_bbox_pred_' + suffix, 'retnet_roi_bbox_targets_' + suffix, 'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num' ], 'retnet_loss_bbox_' + suffix, beta=cfg.RETINANET.BBOX_REG_BETA, scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT ) gradients.append(bbox_loss) losses.append('retnet_loss_bbox_' + suffix) # ========================================================================== # cls loss - depends on softmax/sigmoid outputs # ========================================================================== for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_lvl_logits = 'retnet_cls_pred_' + suffix if not cfg.RETINANET.SOFTMAX: cls_focal_loss = model.net.SigmoidFocalLoss( [ cls_lvl_logits, 'retnet_cls_labels_' + suffix, 'retnet_fg_num' ], ['fl_{}'.format(suffix)], gamma=cfg.RETINANET.LOSS_GAMMA, alpha=cfg.RETINANET.LOSS_ALPHA, scale=model.GetLossScale(), num_classes=model.num_classes - 1 ) gradients.append(cls_focal_loss) losses.append('fl_{}'.format(suffix)) else: cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss( [ cls_lvl_logits, 'retnet_cls_labels_' + suffix, 'retnet_fg_num' ], ['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)], gamma=cfg.RETINANET.LOSS_GAMMA, alpha=cfg.RETINANET.LOSS_ALPHA, scale=model.GetLossScale(), num_classes=model.num_classes ) gradients.append(cls_focal_loss) losses.append('fl_{}'.format(suffix)) loss_gradients.update(blob_utils.get_loss_gradients(model, gradients)) model.AddLosses(losses) return loss_gradients ================================================ FILE: detectron/modeling/rfcn_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill # ---------------------------------------------------------------------------- # # R-FCN outputs and losses # ---------------------------------------------------------------------------- # def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv( blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv( blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Bounding-box regression conv num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.Conv( blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Classification PS RoI pooling model.net.PSRoIPool( ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE ) model.net.Reshape( 'cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool( ['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE ) ================================================ FILE: detectron/modeling/rpn_heads.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from detectron.core.config import cfg from detectron.modeling.generate_anchors import generate_anchors from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill import detectron.modeling.FPN as FPN import detectron.utils.blob as blob_utils # ---------------------------------------------------------------------------- # # RPN and Faster R-CNN outputs and losses # ---------------------------------------------------------------------------- # def add_generic_rpn_outputs(model, blob_in, dim_in, spatial_scale_in): """Add RPN outputs (objectness classification and bounding box regression) to an RPN model. Abstracts away the use of FPN. """ loss_gradients = None if cfg.FPN.FPN_ON: # Delegate to the FPN module FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in) if cfg.MODEL.FASTER_RCNN: # CollectAndDistributeFpnRpnProposals also labels proposals when in # training mode model.CollectAndDistributeFpnRpnProposals() if model.train: loss_gradients = FPN.add_fpn_rpn_losses(model) else: # Not using FPN, add RPN to a single scale add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale_in) if model.train: loss_gradients = add_single_scale_rpn_losses(model) return loss_gradients def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors( stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS ) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv( blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv( 'conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas model.Conv( 'conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals( ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale ) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois') def add_single_scale_rpn_losses(model): """Add losses for a single scale RPN model (i.e., no FPN).""" # Spatially narrow the full-sized RPN label arrays to match the feature map # shape model.net.SpatialNarrowAs( ['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32' ) for key in ('targets', 'inside_weights', 'outside_weights'): model.net.SpatialNarrowAs( ['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key ) loss_rpn_cls = model.net.SigmoidCrossEntropyLoss( ['rpn_cls_logits', 'rpn_labels_int32'], 'loss_rpn_cls', scale=model.GetLossScale() ) loss_rpn_bbox = model.net.SmoothL1Loss( [ 'rpn_bbox_pred', 'rpn_bbox_targets', 'rpn_bbox_inside_weights', 'rpn_bbox_outside_weights' ], 'loss_rpn_bbox', beta=1. / 9., scale=model.GetLossScale() ) loss_gradients = blob_utils.get_loss_gradients( model, [loss_rpn_cls, loss_rpn_bbox] ) model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox']) return loss_gradients ================================================ FILE: detectron/ops/__init__.py ================================================ ================================================ FILE: detectron/ops/collect_and_distribute_fpn_rpn_proposals.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np from detectron.core.config import cfg from detectron.datasets import json_dataset from detectron.datasets import roidb as roidb_utils import detectron.modeling.FPN as fpn import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data import detectron.utils.blob as blob_utils class CollectAndDistributeFpnRpnProposalsOp: def __init__(self, train): self._train = train def forward(self, inputs, outputs): """See modeling.detector.CollectAndDistributeFpnRpnProposals for inputs/outputs documentation. """ # inputs is # [rpn_rois_fpn2, ..., rpn_rois_fpn6, # rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6] # If training with Faster R-CNN, then inputs will additionally include # + [roidb, im_info] rois = collect(inputs, self._train) if self._train: # During training we reuse the data loader code. We populate roidb # entries on the fly using the rois generated by RPN. # im_info: [[im_height, im_width, im_scale], ...] im_info = inputs[-1].data im_scales = im_info[:, 2] roidb = blob_utils.deserialize(inputs[-2].data) # For historical consistency with the original Faster R-CNN # implementation we are *not* filtering crowd proposals. # This choice should be investigated in the future (it likely does # not matter). json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) roidb_utils.add_bbox_regression_targets(roidb) # Compute training labels for the RPN proposals; also handles # distributing the proposals over FPN levels output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names() blobs = {k: [] for k in output_blob_names} fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb) for i, k in enumerate(output_blob_names): blob_utils.py_op_copy_blob(blobs[k], outputs[i]) else: # For inference we have a special code path that avoids some data # loader overhead distribute(rois, None, outputs, self._train) def collect(inputs, is_training): cfg_key = 'TRAIN' if is_training else 'TEST' post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL num_lvls = k_max - k_min + 1 roi_inputs = inputs[:num_lvls] score_inputs = inputs[num_lvls:] if is_training: score_inputs = score_inputs[:-2] # rois are in [[batch_idx, x0, y0, x1, y2], ...] format # Combine predictions across all levels and retain the top scoring rois = np.concatenate([blob.data for blob in roi_inputs]) scores = np.concatenate([blob.data for blob in score_inputs]).squeeze() inds = np.argsort(-scores)[:post_nms_topN] rois = rois[inds, :] return rois def distribute(rois, label_blobs, outputs, train): """To understand the output blob order see return value of detectron.roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False) """ lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL lvls = fpn.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max) outputs[0].reshape(rois.shape) outputs[0].data[...] = rois # Create new roi blobs for each FPN level # (See: modeling.FPN.add_multilevel_roi_blobs which is similar but annoying # to generalize to support this particular case.) rois_idx_order = np.empty((0, )) for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)): idx_lvl = np.where(lvls == lvl)[0] blob_roi_level = rois[idx_lvl, :] outputs[output_idx + 1].reshape(blob_roi_level.shape) outputs[output_idx + 1].data[...] = blob_roi_level rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) rois_idx_restore = np.argsort(rois_idx_order) blob_utils.py_op_copy_blob(rois_idx_restore.astype(np.int32), outputs[-1]) ================================================ FILE: detectron/ops/generate_proposal_labels.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging from detectron.datasets import json_dataset from detectron.datasets import roidb as roidb_utils from detectron.utils import blob as blob_utils import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data logger = logging.getLogger(__name__) class GenerateProposalLabelsOp: def forward(self, inputs, outputs): """See modeling.detector.GenerateProposalLabels for inputs/outputs documentation. """ # During training we reuse the data loader code. We populate roidb # entries on the fly using the rois generated by RPN. # im_info: [[im_height, im_width, im_scale], ...] rois = inputs[0].data roidb = blob_utils.deserialize(inputs[1].data) im_info = inputs[2].data im_scales = im_info[:, 2] output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names() # For historical consistency with the original Faster R-CNN # implementation we are *not* filtering crowd proposals. # This choice should be investigated in the future (it likely does # not matter). json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) roidb_utils.add_bbox_regression_targets(roidb) blobs = {k: [] for k in output_blob_names} fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb) for i, k in enumerate(output_blob_names): blob_utils.py_op_copy_blob(blobs[k], outputs[i]) ================================================ FILE: detectron/ops/generate_proposals.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick and Sean Bell # -------------------------------------------------------- import numpy as np from detectron.core.config import cfg import detectron.utils.boxes as box_utils class GenerateProposalsOp: """Output object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). See comment in utils/boxes:bbox_transform_inv for details abouts the optional `reg_weights` parameter. """ def __init__(self, anchors, spatial_scale, train, reg_weights=(1.0, 1.0, 1.0, 1.0)): self._anchors = anchors self._num_anchors = self._anchors.shape[0] self._feat_stride = 1. / spatial_scale self._train = train self._reg_weights = reg_weights def forward(self, inputs, outputs): """See modeling.detector.GenerateProposals for inputs/outputs documentation. """ # 1. for each location i in a (H, W) grid: # generate A anchor boxes centered on cell i # apply predicted bbox deltas to each of the A anchors at cell i # 2. clip predicted boxes to image # 3. remove predicted boxes with either height or width < threshold # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take the top pre_nms_topN proposals before NMS # 6. apply NMS with a loose threshold (0.7) to the remaining proposals # 7. take after_nms_topN proposals after NMS # 8. return the top proposals # predicted probability of fg object for each RPN anchor scores = inputs[0].data # predicted achors transformations bbox_deltas = inputs[1].data # input image (height, width, scale), in which scale is the scale factor # applied to the original dataset image to get the network input image im_info = inputs[2].data # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifted positions on the (H, W) grid shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y, copy=False) # Convert to (K, 4), K=H*W, where the columns are (dx, dy, dx, dy) # shift pointing to each grid location shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Broacast anchors over shifts to enumerate all anchors at all positions # in the (H, W) grid: # - add A anchors of shape (1, A, 4) to # - K shifts of shape (K, 1, 4) to get # - all shifted anchors of shape (K, A, 4) # - reshape to (K*A, 4) shifted anchors num_images = inputs[0].shape[0] A = self._num_anchors K = shifts.shape[0] all_anchors = self._anchors[np.newaxis, :, :] + shifts[:, np.newaxis, :] all_anchors = all_anchors.reshape((K * A, 4)) rois = np.empty((0, 5), dtype=np.float32) roi_probs = np.empty((0, 1), dtype=np.float32) for im_i in range(num_images): im_i_boxes, im_i_probs = self.proposals_for_one_image( im_info[im_i, :], all_anchors, bbox_deltas[im_i, :, :, :], scores[im_i, :, :, :] ) batch_inds = im_i * np.ones( (im_i_boxes.shape[0], 1), dtype=np.float32 ) im_i_rois = np.hstack((batch_inds, im_i_boxes)) rois = np.append(rois, im_i_rois, axis=0) roi_probs = np.append(roi_probs, im_i_probs, axis=0) outputs[0].reshape(rois.shape) outputs[0].data[...] = rois if len(outputs) > 1: outputs[1].reshape(roi_probs.shape) outputs[1].data[...] = roi_probs def proposals_for_one_image( self, im_info, all_anchors, bbox_deltas, scores ): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, self._reg_weights) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores def _filter_boxes(boxes, min_size, im_info): """Only keep boxes with both sides >= min_size and center within the image. """ # Compute the width and height of the proposal boxes as measured in the original # image coordinate system (this is required to avoid "Negative Areas Found" # assertions in other parts of the code that measure). im_scale = im_info[2] ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1 hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1 # To avoid numerical issues we require the min_size to be at least 1 pixel in the # original image min_size = np.maximum(min_size, 1) # Proposal center is computed relative to the scaled input image ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 x_ctr = boxes[:, 0] + ws / 2. y_ctr = boxes[:, 1] + hs / 2. keep = np.where( (ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) & (x_ctr < im_info[1]) & (y_ctr < im_info[0]) )[0] return keep ================================================ FILE: detectron/ops/zero_even_op.cc ================================================ /** * Copyright (c) 2016-present, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "zero_even_op.h" namespace caffe2 { template <> bool ZeroEvenOp::RunOnDevice() { // Retrieve the input tensor. const auto& X = Input(0); CAFFE_ENFORCE(X.dim() == 1); // Initialize the output tensor to a copy of the input tensor. auto* Y = Output(0); Y->CopyFrom(X); // Set output elements at even indices to zero. auto* Y_data = Y->mutable_data(); for (auto i = 0; i < Y->numel(); i += 2) { Y_data[i] = 0.0f; } return true; } REGISTER_CPU_OPERATOR(ZeroEven, ZeroEvenOp); OPERATOR_SCHEMA(ZeroEven) .NumInputs(1) .NumOutputs(1) .Input( 0, "X", "1D input tensor") .Output( 0, "Y", "1D output tensor"); } // namespace caffe2 ================================================ FILE: detectron/ops/zero_even_op.cu ================================================ /** * Copyright (c) 2016-present, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "caffe2/core/context_gpu.h" #include "zero_even_op.h" namespace caffe2 { namespace { template __global__ void SetEvenIndsToVal(size_t num_even_inds, T val, T* data) { CUDA_1D_KERNEL_LOOP(i, num_even_inds) { data[i << 1] = val; } } } // namespace template <> bool ZeroEvenOp::RunOnDevice() { // Retrieve the input tensor. const auto& X = Input(0); CAFFE_ENFORCE(X.ndim() == 1); // Initialize the output tensor to a copy of the input tensor. auto* Y = Output(0); Y->CopyFrom(X); // Set output elements at even indices to zero. auto output_size = Y->size(); if (output_size > 0) { size_t num_even_inds = output_size / 2 + output_size % 2; SetEvenIndsToVal <<>>( num_even_inds, 0.0f, Y->mutable_data()); } return true; } REGISTER_CUDA_OPERATOR(ZeroEven, ZeroEvenOp); } // namespace caffe2 ================================================ FILE: detectron/ops/zero_even_op.h ================================================ /** * Copyright (c) 2016-present, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ZERO_EVEN_OP_H_ #define ZERO_EVEN_OP_H_ #include "caffe2/core/context.h" #include "caffe2/core/operator.h" namespace caffe2 { /** * ZeroEven operator. Zeros elements at even indices of an 1D array. * Elements at odd indices are preserved. * * This toy operator is an example of a custom operator and may be a useful * reference for adding new custom operators to the Detectron codebase. */ template class ZeroEvenOp final : public Operator { public: // Introduce Operator helper members. USE_OPERATOR_CONTEXT_FUNCTIONS; ZeroEvenOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws) {} bool RunOnDevice() override; }; } // namespace caffe2 #endif // ZERO_EVEN_OP_H_ ================================================ FILE: detectron/roi_data/__init__.py ================================================ ================================================ FILE: detectron/roi_data/data_utils.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Common utility functions for RPN and RetinaNet minibtach blobs preparation. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import namedtuple import logging import numpy as np import threading from detectron.core.config import cfg from detectron.modeling.generate_anchors import generate_anchors import detectron.utils.boxes as box_utils logger = logging.getLogger(__name__) # octave and aspect fields are only used on RetinaNet. Octave corresponds to the # scale of the anchor and aspect denotes which aspect ratio is used in the range # of aspect ratios FieldOfAnchors = namedtuple( 'FieldOfAnchors', [ 'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size', 'octave', 'aspect' ] ) # Cache for memoizing _get_field_of_anchors _threadlocal_foa = threading.local() def get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None ): global _threadlocal_foa if not hasattr(_threadlocal_foa, 'cache'): _threadlocal_foa.cache = {} cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) if cache_key in _threadlocal_foa.cache: return _threadlocal_foa.cache[cache_key] # Anchors at a single feature cell cell_anchors = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios ) num_cell_anchors = cell_anchors.shape[0] # Generate canonical proposals from shifted anchors # Enumerate all shifted positions on the (H, W) grid fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) ) field_size = int(np.ceil(fpn_max_size / float(stride))) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.ravel() shift_y = shift_y.ravel() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Broacast anchors over shifts to enumerate all anchors at all positions # in the (H, W) grid: # - add A cell anchors of shape (1, A, 4) to # - K shifts of shape (K, 1, 4) to get # - all shifted anchors of shape (K, A, 4) # - reshape to (K*A, 4) shifted anchors A = num_cell_anchors K = shifts.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) ) field_of_anchors = field_of_anchors.reshape((K * A, 4)) foa = FieldOfAnchors( field_of_anchors=field_of_anchors.astype(np.float32), num_cell_anchors=num_cell_anchors, stride=stride, field_size=field_size, octave=octave, aspect=aspect ) _threadlocal_foa.cache[cache_key] = foa return foa def unmap(data, count, inds, fill=0): """Unmap a subset of item (data) back to the original set of items (of size count)""" if count == len(inds): return data if len(data.shape) == 1: ret = np.empty((count, ), dtype=data.dtype) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype) ret.fill(fill) ret[inds, :] = data return ret def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)): """Compute bounding-box regression targets for an image.""" return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype( np.float32, copy=False ) ================================================ FILE: detectron/roi_data/fast_rcnn.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Construct minibatches for Fast R-CNN training. Handles the minibatch blobs that are specific to Fast R-CNN. Other blobs that are generic to RPN, etc. are handled by their respecitive roi_data modules. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np import numpy.random as npr from detectron.core.config import cfg import detectron.modeling.FPN as fpn import detectron.roi_data.keypoint_rcnn as keypoint_rcnn_roi_data import detectron.roi_data.mask_rcnn as mask_rcnn_roi_data import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils logger = logging.getLogger(__name__) def get_fast_rcnn_blob_names(is_training=True): """Fast R-CNN blob names.""" # rois blob: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) blob_names = ['rois'] if is_training: # labels_int32 blob: R categorical labels in [0, ..., K] for K # foreground classes plus background blob_names += ['labels_int32'] if is_training: # bbox_targets blob: R bounding-box regression targets with 4 # targets per class blob_names += ['bbox_targets'] # bbox_inside_weights blob: At most 4 targets per roi are active # this binary vector sepcifies the subset of active targets blob_names += ['bbox_inside_weights'] blob_names += ['bbox_outside_weights'] if is_training and cfg.MODEL.MASK_ON: # 'mask_rois': RoIs sampled for training the mask prediction branch. # Shape is (#masks, 5) in format (batch_idx, x1, y1, x2, y2). blob_names += ['mask_rois'] # 'roi_has_mask': binary labels for the RoIs specified in 'rois' # indicating if each RoI has a mask or not. Note that in some cases # a *bg* RoI will have an all -1 (ignore) mask associated with it in # the case that no fg RoIs can be sampled. Shape is (batchsize). blob_names += ['roi_has_mask_int32'] # 'masks_int32' holds binary masks for the RoIs specified in # 'mask_rois'. Shape is (#fg, M * M) where M is the ground truth # mask size. blob_names += ['masks_int32'] if is_training and cfg.MODEL.KEYPOINTS_ON: # 'keypoint_rois': RoIs sampled for training the keypoint prediction # branch. Shape is (#instances, 5) in format (batch_idx, x1, y1, x2, # y2). blob_names += ['keypoint_rois'] # 'keypoint_locations_int32': index of keypoint in # KRCNN.HEATMAP_SIZE**2 sized array. Shape is (#instances). Used in # SoftmaxWithLoss. blob_names += ['keypoint_locations_int32'] # 'keypoint_weights': weight assigned to each target in # 'keypoint_locations_int32'. Shape is (#instances). Used in # SoftmaxWithLoss. blob_names += ['keypoint_weights'] # 'keypoint_loss_normalizer': optional normalization factor to use if # cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False. blob_names += ['keypoint_loss_normalizer'] if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: # Support for FPN multi-level rois without bbox reg isn't # implemented (... and may never be implemented) k_max = cfg.FPN.ROI_MAX_LEVEL k_min = cfg.FPN.ROI_MIN_LEVEL # Same format as rois blob, but one per FPN level for lvl in range(k_min, k_max + 1): blob_names += ['rois_fpn' + str(lvl)] blob_names += ['rois_idx_restore_int32'] if is_training: if cfg.MODEL.MASK_ON: for lvl in range(k_min, k_max + 1): blob_names += ['mask_rois_fpn' + str(lvl)] blob_names += ['mask_rois_idx_restore_int32'] if cfg.MODEL.KEYPOINTS_ON: for lvl in range(k_min, k_max + 1): blob_names += ['keypoint_rois_fpn' + str(lvl)] blob_names += ['keypoint_rois_idx_restore_int32'] return blob_names def add_fast_rcnn_blobs(blobs, im_scales, roidb): """Add blobs needed for training Fast R-CNN style models.""" # Sample training RoIs from each image and append them to the blob lists for im_i, entry in enumerate(roidb): frcn_blobs = _sample_rois(entry, im_scales[im_i], im_i) for k, v in frcn_blobs.items(): blobs[k].append(v) # Concat the training blob lists into tensors for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) # Add FPN multilevel training RoIs, if configured if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois(blobs) # Perform any final work and validity checks after the collating blobs for # all minibatch images valid = True if cfg.MODEL.KEYPOINTS_ON: valid = keypoint_rcnn_roi_data.finalize_keypoint_minibatch(blobs, valid) return valid def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) )[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False ) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :] ) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype ) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs( blob_dict, sampled_boxes, roidb, im_scale, batch_idx ) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ) return blob_dict def _expand_bbox_targets(bbox_target_data): """Bounding-box regression targets are stored in a compact form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_inside_weights (ndarray): N x 4K blob of loss weights """ num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: num_bbox_reg_classes = 2 # bg and fg clss = bbox_target_data[:, 0] bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes)) bbox_inside_weights = blob_utils.zeros(bbox_targets.shape) inds = np.where(clss > 0)[0] for ind in inds: cls = int(clss[ind]) start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0) return bbox_targets, bbox_inside_weights def _add_multilevel_rois(blobs): """By default training RoIs are added for a single feature map level only. When using FPN, the RoIs must be distributed over different FPN levels according the level assignment heuristic (see: modeling.FPN. map_rois_to_fpn_levels). """ lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL def _distribute_rois_over_fpn_levels(rois_blob_name): """Distribute rois over the different FPN levels.""" # Get target level for each roi # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take # the box coordinates from columns 1:5 target_lvls = fpn.map_rois_to_fpn_levels( blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max ) # Add per FPN level roi blobs named like: _fpn fpn.add_multilevel_roi_blobs( blobs, rois_blob_name, blobs[rois_blob_name], target_lvls, lvl_min, lvl_max ) _distribute_rois_over_fpn_levels('rois') if cfg.MODEL.MASK_ON: _distribute_rois_over_fpn_levels('mask_rois') if cfg.MODEL.KEYPOINTS_ON: _distribute_rois_over_fpn_levels('keypoint_rois') ================================================ FILE: detectron/roi_data/keypoint_rcnn.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Construct minibatches for Mask R-CNN training when keypoints are enabled. Handles the minibatch blobs that are specific to training Mask R-CNN for keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are handled by their respecitive roi_data modules. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np from detectron.core.config import cfg import detectron.utils.blob as blob_utils import detectron.utils.keypoints as keypoint_utils logger = logging.getLogger(__name__) def add_keypoint_rcnn_blobs( blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible) )[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice( kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False ) sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype ) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_heatmap_labels( sampled_keypoints, sampled_fg_rois ) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) heats = heats.reshape(shape) weights = weights.reshape(shape) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1) ) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights def finalize_keypoint_minibatch(blobs, valid): """Finalize the minibatch after blobs for all minibatch images have been collated. """ min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH num_visible_keypoints = np.sum(blobs['keypoint_weights']) valid = ( valid and len(blobs['keypoint_weights']) > 0 and num_visible_keypoints > min_count ) # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False. # See modeling.model_builder.add_keypoint_losses norm = num_visible_keypoints / ( cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.TRAIN.FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS ) blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32) return valid def _within_box(points, boxes): """Validate which keypoints are contained inside a given box. points: Nx2xK boxes: Nx4 output: NxK """ x_within = np.logical_and( points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1), points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1) ) y_within = np.logical_and( points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1), points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1) ) return np.logical_and(x_within, y_within) ================================================ FILE: detectron/roi_data/loader.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Detectron data loader. The design is generic and abstracted away from any details of the minibatch. A minibatch is a dictionary of blob name keys and their associated numpy (float32 or int32) ndarray values. Outline of the data loader design: loader thread\ loader thread \ / GPU 1 enqueue thread -> feed -> EnqueueOp ... -> minibatch queue -> ... loader thread / \ GPU N enqueue thread -> feed -> EnqueueOp loader thread/ <---------------------------- CPU -----------------------------|---- GPU ----> A pool of loader threads construct minibatches that are put onto the shared minibatch queue. Each GPU has an enqueue thread that pulls a minibatch off the minibatch queue, feeds the minibatch blobs into the workspace, and then runs an EnqueueBlobsOp to place the minibatch blobs into the GPU's blobs queue. During each fprop the first thing the network does is run a DequeueBlobsOp in order to populate the workspace with the blobs from a queued minibatch. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import deque from collections import OrderedDict import logging import numpy as np import signal import threading import time import uuid from six.moves import queue as Queue from caffe2.python import core, workspace from detectron.core.config import cfg from detectron.roi_data.minibatch import get_minibatch from detectron.roi_data.minibatch import get_minibatch_blob_names from detectron.utils.coordinator import coordinated_get from detectron.utils.coordinator import coordinated_put from detectron.utils.coordinator import Coordinator import detectron.utils.c2 as c2_utils logger = logging.getLogger(__name__) class RoIDataLoader: def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler) ================================================ FILE: detectron/roi_data/mask_rcnn.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Construct minibatches for Mask R-CNN training. Handles the minibatch blobs that are specific to Mask R-CNN. Other blobs that are generic to RPN or Fast/er R-CNN are handled by their respecitive roi_data modules. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np from detectron.core.config import cfg import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils import detectron.utils.segms as segm_utils logger = logging.getLogger(__name__) def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks def _expand_to_class_specific_mask_targets(masks, mask_class_labels): """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) to encode class specific mask targets. """ assert masks.shape[0] == mask_class_labels.shape[0] M = cfg.MRCNN.RESOLUTION # Target values of -1 are "don't care" / ignore labels mask_targets = -blob_utils.ones( (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True ) for i in range(masks.shape[0]): cls = int(mask_class_labels[i]) start = M**2 * cls end = start + M**2 # Ignore background instance # (only happens when there is no fg samples in an image) if cls > 0: mask_targets[i, start:end] = masks[i, :] return mask_targets ================================================ FILE: detectron/roi_data/minibatch.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Construct minibatches for Detectron networks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import logging import numpy as np from detectron.core.config import cfg import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data import detectron.roi_data.retinanet as retinanet_roi_data import detectron.roi_data.rpn as rpn_roi_data import detectron.utils.blob as blob_utils logger = logging.getLogger(__name__) def get_minibatch_blob_names(is_training=True): """Return blob names in the order in which they are read by the data loader. """ # data blob: holds a batch of N images, each with 3 channels blob_names = ['data'] if cfg.RPN.RPN_ON: # RPN-only or end-to-end Faster R-CNN blob_names += rpn_roi_data.get_rpn_blob_names(is_training=is_training) elif cfg.RETINANET.RETINANET_ON: blob_names += retinanet_roi_data.get_retinanet_blob_names( is_training=is_training ) else: # Fast R-CNN like models trained on precomputed proposals blob_names += fast_rcnn_roi_data.get_fast_rcnn_blob_names( is_training=is_training ) return blob_names def get_minibatch(roidb): """Given a roidb, construct a minibatch sampled from it.""" # We collect blobs from each image onto a list and then concat them into a # single tensor, hence we initialize each blob to an empty list blobs = {k: [] for k in get_minibatch_blob_names()} # Get the input image blob, formatted for caffe2 im_blob, im_scales = _get_image_blob(roidb) blobs['data'] = im_blob if cfg.RPN.RPN_ON: # RPN-only or end-to-end Faster/Mask R-CNN valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb) elif cfg.RETINANET.RETINANET_ON: im_width, im_height = im_blob.shape[3], im_blob.shape[2] # im_width, im_height corresponds to the network input: padded image # (if needed) width and height. We pass it as input and slice the data # accordingly so that we don't need to use SampleAsOp valid = retinanet_roi_data.add_retinanet_blobs( blobs, im_scales, roidb, im_width, im_height ) else: # Fast R-CNN like models trained on precomputed proposals valid = fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb) return blobs, valid def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images ) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE ) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales ================================================ FILE: detectron/roi_data/retinanet.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Compute minibatch blobs for training a RetinaNet network.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import logging import detectron.utils.boxes as box_utils import detectron.roi_data.data_utils as data_utils from detectron.core.config import cfg logger = logging.getLogger(__name__) def get_retinanet_blob_names(is_training=True): """ Returns blob names in the order in which they are read by the data loader. N = number of images per minibatch A = number of anchors = num_scales * num_aspect_ratios (for example 9 used in RetinaNet paper) H, W = spatial dimensions (different for each FPN level) M = Out of all the anchors generated, depending on the positive/negative IoU overlap thresholds, we will have M positive anchors. These are the anchors that bounding box branch will regress on. retnet_cls_labels -> labels for the cls branch for each FPN level Shape: N x A x H x W retnet_roi_bbox_targets -> targets for the bbox regression branch Shape: M x 4 retnet_roi_fg_bbox_locs -> for the bbox regression, since we are only interested in regressing on fg bboxes which are M in number and the output prediction of the network is of shape N x (A * 4) x H x W (in case of non class-specific bbox), so we store the locations of positive fg boxes in this blob retnet_roi_fg_bbox_locs of shape M x 4 where each row looks like: [img_id, anchor_id, x_loc, y_loc] """ # im_info: (height, width, image scale) blob_names = ['im_info'] assert cfg.FPN.FPN_ON, "RetinaNet uses FPN for dense detection" # Same format as RPN blobs, but one per FPN level if is_training: blob_names += ['retnet_fg_num', 'retnet_bg_num'] for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): suffix = 'fpn{}'.format(lvl) blob_names += [ 'retnet_cls_labels_' + suffix, 'retnet_roi_bbox_targets_' + suffix, 'retnet_roi_fg_bbox_locs_' + suffix, ] return blob_names def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2. ** lvl for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i: i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True def _get_retinanet_blobs( foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[ np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[gt_inds] fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = ( np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg ================================================ FILE: detectron/roi_data/rpn.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Minibatch construction for Region Proposal Networks (RPN).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np import numpy.random as npr from detectron.core.config import cfg import detectron.roi_data.data_utils as data_utils import detectron.utils.blob as blob_utils import detectron.utils.boxes as box_utils logger = logging.getLogger(__name__) def get_rpn_blob_names(is_training=True): """Blob names used by RPN.""" # im_info: (height, width, image scale) blob_names = ['im_info'] if is_training: # gt boxes: (batch_idx, x1, y1, x2, y2, cls) blob_names += ['roidb'] if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # Same format as RPN blobs, but one per FPN level for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): blob_names += [ 'rpn_labels_int32_wide_fpn' + str(lvl), 'rpn_bbox_targets_wide_fpn' + str(lvl), 'rpn_bbox_inside_weights_wide_fpn' + str(lvl), 'rpn_bbox_outside_weights_wide_fpn' + str(lvl) ] else: # Single level RPN blobs blob_names += [ 'rpn_labels_int32_wide', 'rpn_bbox_targets_wide', 'rpn_bbox_inside_weights_wide', 'rpn_bbox_outside_weights_wide' ] return blob_names def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh) )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1]) ] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max )[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False ) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] else: enable_inds = bg_inds labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :] ) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap( bbox_targets, total_anchors, inds_inside, fill=0 ) bbox_inside_weights = data_utils.unmap( bbox_inside_weights, total_anchors, inds_inside, fill=0 ) bbox_outside_weights = data_utils.unmap( bbox_outside_weights, total_anchors, inds_inside, fill=0 ) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict( rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights ) ) return blobs_out[0] if len(blobs_out) == 1 else blobs_out ================================================ FILE: detectron/tests/data_loader_benchmark.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Example usage: # data_loader_benchmark.par \ # NUM_GPUS 2 \ # TRAIN.DATASETS "('voc_2007_trainval',)" \ # TRAIN.PROPOSAL_FILES /path/to/voc_2007_trainval/proposals.pkl \ # DATA_LOADER.NUM_THREADS 4 \ # DATA_LOADER.MINIBATCH_QUEUE_SIZE 64 \ # DATA_LOADER.BLOBS_QUEUE_CAPACITY 8 from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import logging import numpy as np import pprint import sys import time from caffe2.python import core from caffe2.python import muji from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import merge_cfg_from_file from detectron.core.config import merge_cfg_from_list from detectron.datasets.roidb import combined_roidb_for_training from detectron.roi_data.loader import RoIDataLoader from detectron.utils.logging import setup_logging from detectron.utils.timer import Timer def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( '--num-batches', dest='num_batches', help='Number of minibatches to run', default=200, type=int) parser.add_argument( '--sleep', dest='sleep_time', help='Seconds sleep to emulate a network running', default=0.1, type=float) parser.add_argument( '--cfg', dest='cfg_file', help='optional config file', default=None, type=str) parser.add_argument( '--x-factor', dest='x_factor', help='simulates x-factor more GPUs', default=1, type=int) parser.add_argument( '--profiler', dest='profiler', help='profile minibatch load time', action='store_true') parser.add_argument( 'opts', help='See detectron/core/config.py for all options', default=None, nargs=argparse.REMAINDER) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def loader_loop(roi_data_loader): load_timer = Timer() iters = 100 for i in range(iters): load_timer.tic() roi_data_loader.get_next_minibatch() load_timer.toc() print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format( i + 1, iters, load_timer.average_time)) def main(opts): logger = logging.getLogger(__name__) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) logger.info('{:d} roidb entries'.format(len(roidb))) roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) blob_names = roi_data_loader.get_output_names() net = core.Net('dequeue_net') net.type = 'dag' all_blobs = [] for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): for blob_name in blob_names: blob = core.ScopedName(blob_name) all_blobs.append(blob) workspace.CreateBlob(blob) logger.info('Creating blob: {}'.format(blob)) net.DequeueBlobs( roi_data_loader._blobs_queue_name, blob_names) logger.info("Protobuf:\n" + str(net.Proto())) if opts.profiler: import cProfile cProfile.runctx( 'loader_loop(roi_data_loader)', globals(), locals(), sort='cumulative') else: loader_loop(roi_data_loader) roi_data_loader.register_sigint_handler() roi_data_loader.start(prefill=True) total_time = 0 for i in range(opts.num_batches): start_t = time.time() for _ in range(opts.x_factor): workspace.RunNetOnce(net) total_time += (time.time() - start_t) / opts.x_factor logger.info( '{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'.format( i + 1, opts.num_batches, total_time / (i + 1), roi_data_loader._minibatch_queue.qsize(), cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE ) ) # Sleep to simulate the time taken by running a little network time.sleep(opts.sleep_time) # To inspect: # blobs = workspace.FetchBlobs(all_blobs) # from IPython import embed; embed() logger.info('Shutting down data loader...') roi_data_loader.shutdown() if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) logger = setup_logging(__name__) logger.setLevel(logging.DEBUG) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) np.random.seed(cfg.RNG_SEED) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() logger.info('Running with config:') logger.info(pprint.pformat(cfg)) main(args) ================================================ FILE: detectron/tests/test_batch_permutation_op.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import unittest from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import gradient_checker from caffe2.python import workspace import detectron.utils.logging as logging_utils import detectron.utils.c2 as c2_utils class BatchPermutationOpTest(unittest.TestCase): def _run_op_test(self, X, I, check_grad=False): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y']) workspace.FeedBlob('X', X) workspace.FeedBlob('I', I) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') if check_grad: gc = gradient_checker.GradientChecker( stepsize=0.1, threshold=0.001, device_option=core.DeviceOption(caffe2_pb2.CUDA, 0) ) res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0]) self.assertTrue(res, 'Grad check failed') Y_ref = X[I] np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08) def _run_speed_test(self, iters=5, N=1024): """This function provides an example of how to benchmark custom operators using the Caffe2 'prof_dag' network execution type. Please note that for 'prof_dag' to work, Caffe2 must be compiled with profiling support using the `-DUSE_PROF=ON` option passed to `cmake` when building Caffe2. """ net = core.Net('test') net.Proto().type = 'prof_dag' net.Proto().num_workers = 2 Y = net.BatchPermutation(['X', 'I'], 'Y') Y_flat = net.FlattenToVec([Y], 'Y_flat') loss = net.AveragedLoss([Y_flat], 'loss') net.AddGradientOperators([loss]) workspace.CreateNet(net) X = np.random.randn(N, 256, 14, 14) for _i in range(iters): I = np.random.permutation(N) workspace.FeedBlob('X', X.astype(np.float32)) workspace.FeedBlob('I', I.astype(np.int32)) workspace.RunNet(net.Proto().name) np.testing.assert_allclose( workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08 ) def test_forward_and_gradient(self): A = np.random.randn(2, 3, 5, 7).astype(np.float32) I = np.array([0, 1], dtype=np.int32) self._run_op_test(A, I, check_grad=True) A = np.random.randn(2, 3, 5, 7).astype(np.float32) I = np.array([1, 0], dtype=np.int32) self._run_op_test(A, I, check_grad=True) A = np.random.randn(10, 3, 5, 7).astype(np.float32) I = np.array(np.random.permutation(10), dtype=np.int32) self._run_op_test(A, I, check_grad=True) def test_size_exceptions(self): A = np.random.randn(2, 256, 42, 86).astype(np.float32) I = np.array(np.random.permutation(10), dtype=np.int32) with self.assertRaises(RuntimeError): self._run_op_test(A, I) # See doc string in _run_speed_test # def test_perf(self): # with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): # self._run_speed_test() if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) c2_utils.import_detectron_ops() assert 'BatchPermutation' in workspace.RegisteredOperators() logging_utils.setup_logging(__name__) unittest.main() ================================================ FILE: detectron/tests/test_bbox_transform.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import unittest from pycocotools import mask as COCOmask import detectron.utils.boxes as box_utils def random_boxes(mean_box, stdev, N): boxes = np.random.randn(N, 4) * stdev + mean_box return boxes.astype(dtype=np.float32) class TestBboxTransform(unittest.TestCase): def test_bbox_transform_and_inverse(self): weights = (5, 5, 10, 10) src_boxes = random_boxes([10, 10, 20, 20], 1, 10) dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) deltas = box_utils.bbox_transform_inv( src_boxes, dst_boxes, weights=weights ) dst_boxes_reconstructed = box_utils.bbox_transform( src_boxes, deltas, weights=weights ) np.testing.assert_array_almost_equal( dst_boxes, dst_boxes_reconstructed, decimal=5 ) def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv( prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights ) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform( prop_xyxy_boxes, deltas, weights=weights ) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape)) def test_cython_bbox_iou_against_coco_api_bbox_iou(self): """Check that our cython implementation of bounding box IoU overlap matches the COCO API implementation. """ def _do_test(b1, b2): # Compute IoU overlap with the cython implementation cython_iou = box_utils.bbox_overlaps(b1, b2) # Compute IoU overlap with the COCO API implementation # (requires converting boxes from xyxy to xywh format) xywh_b1 = box_utils.xyxy_to_xywh(b1) xywh_b2 = box_utils.xyxy_to_xywh(b2) not_crowd = [int(False)] * b2.shape[0] coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd) # IoUs should be similar np.testing.assert_array_almost_equal( cython_iou, coco_ious, decimal=5 ) # Test small boxes b1 = random_boxes([10, 10, 20, 20], 5, 10) b2 = random_boxes([10, 10, 20, 20], 5, 10) _do_test(b1, b2) # Test bigger boxes b1 = random_boxes([10, 10, 110, 20], 20, 10) b2 = random_boxes([10, 10, 110, 20], 20, 10) _do_test(b1, b2) if __name__ == '__main__': unittest.main() ================================================ FILE: detectron/tests/test_cfg.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import copy import tempfile import unittest from detectron.core.config import cfg from detectron.utils.collections import AttrDict import detectron.core.config as core_config import detectron.utils.env as envu import detectron.utils.logging as logging_utils class TestAttrDict(unittest.TestCase): def test_immutability(self): # Top level immutable a = AttrDict() a.foo = 0 a.immutable(True) with self.assertRaises(AttributeError): a.foo = 1 a.bar = 1 assert a.is_immutable() assert a.foo == 0 a.immutable(False) assert not a.is_immutable() a.foo = 1 assert a.foo == 1 # Recursively immutable a.level1 = AttrDict() a.level1.foo = 0 a.level1.level2 = AttrDict() a.level1.level2.foo = 0 a.immutable(True) assert a.is_immutable() with self.assertRaises(AttributeError): a.level1.level2.foo = 1 a.level1.bar = 1 assert a.level1.level2.foo == 0 # Serialize immutability state a.immutable(True) a2 = core_config.load_cfg(envu.yaml_dump(a)) assert a.is_immutable() assert a2.is_immutable() class TestCfg(unittest.TestCase): def test_copy_cfg(self): cfg2 = copy.deepcopy(cfg) s = cfg.MODEL.TYPE cfg2.MODEL.TYPE = 'dummy' assert cfg.MODEL.TYPE == s def test_merge_cfg_from_cfg(self): # Test: merge from deepcopy s = 'dummy0' cfg2 = copy.deepcopy(cfg) cfg2.MODEL.TYPE = s core_config.merge_cfg_from_cfg(cfg2) assert cfg.MODEL.TYPE == s # Test: merge from yaml s = 'dummy1' cfg2 = core_config.load_cfg(envu.yaml_dump(cfg)) cfg2.MODEL.TYPE = s core_config.merge_cfg_from_cfg(cfg2) assert cfg.MODEL.TYPE == s # Test: merge with a valid key s = 'dummy2' cfg2 = AttrDict() cfg2.MODEL = AttrDict() cfg2.MODEL.TYPE = s core_config.merge_cfg_from_cfg(cfg2) assert cfg.MODEL.TYPE == s # Test: merge with an invalid key s = 'dummy3' cfg2 = AttrDict() cfg2.FOO = AttrDict() cfg2.FOO.BAR = s with self.assertRaises(KeyError): core_config.merge_cfg_from_cfg(cfg2) # Test: merge with converted type cfg2 = AttrDict() cfg2.TRAIN = AttrDict() cfg2.TRAIN.SCALES = [1] core_config.merge_cfg_from_cfg(cfg2) assert type(cfg.TRAIN.SCALES) is tuple assert cfg.TRAIN.SCALES[0] == 1 # Test: merge with invalid type cfg2 = AttrDict() cfg2.TRAIN = AttrDict() cfg2.TRAIN.SCALES = 1 with self.assertRaises(ValueError): core_config.merge_cfg_from_cfg(cfg2) def test_merge_cfg_from_file(self): with tempfile.NamedTemporaryFile() as f: envu.yaml_dump(cfg, f) s = cfg.MODEL.TYPE cfg.MODEL.TYPE = 'dummy' assert cfg.MODEL.TYPE != s core_config.merge_cfg_from_file(f.name) assert cfg.MODEL.TYPE == s def test_merge_cfg_from_list(self): opts = [ 'TRAIN.SCALES', '(100, )', 'MODEL.TYPE', u'foobar', 'NUM_GPUS', 2 ] assert len(cfg.TRAIN.SCALES) > 0 assert cfg.TRAIN.SCALES[0] != 100 assert cfg.MODEL.TYPE != 'foobar' assert cfg.NUM_GPUS != 2 core_config.merge_cfg_from_list(opts) assert type(cfg.TRAIN.SCALES) is tuple assert len(cfg.TRAIN.SCALES) == 1 assert cfg.TRAIN.SCALES[0] == 100 assert cfg.MODEL.TYPE == 'foobar' assert cfg.NUM_GPUS == 2 def test_deprecated_key_from_list(self): # You should see logger messages like: # "Deprecated config key (ignoring): MODEL.DILATION" opts = ['FINAL_MSG', 'foobar', 'MODEL.DILATION', 2] with self.assertRaises(AttributeError): _ = cfg.FINAL_MSG # noqa with self.assertRaises(AttributeError): _ = cfg.MODEL.DILATION # noqa core_config.merge_cfg_from_list(opts) with self.assertRaises(AttributeError): _ = cfg.FINAL_MSG # noqa with self.assertRaises(AttributeError): _ = cfg.MODEL.DILATION # noqa def test_deprecated_key_from_file(self): # You should see logger messages like: # "Deprecated config key (ignoring): MODEL.DILATION" with tempfile.NamedTemporaryFile() as f: cfg2 = copy.deepcopy(cfg) cfg2.MODEL.DILATION = 2 envu.yaml_dump(cfg2, f) with self.assertRaises(AttributeError): _ = cfg.MODEL.DILATION # noqa core_config.merge_cfg_from_file(f.name) with self.assertRaises(AttributeError): _ = cfg.MODEL.DILATION # noqa def test_renamed_key_from_list(self): # You should see logger messages like: # "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY; # please update your config" opts = ['EXAMPLE.RENAMED.KEY', 'foobar'] with self.assertRaises(AttributeError): _ = cfg.EXAMPLE.RENAMED.KEY # noqa with self.assertRaises(KeyError): core_config.merge_cfg_from_list(opts) def test_renamed_key_from_file(self): # You should see logger messages like: # "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY; # please update your config" with tempfile.NamedTemporaryFile() as f: cfg2 = copy.deepcopy(cfg) cfg2.EXAMPLE = AttrDict() cfg2.EXAMPLE.RENAMED = AttrDict() cfg2.EXAMPLE.RENAMED.KEY = 'foobar' envu.yaml_dump(cfg2, f) with self.assertRaises(AttributeError): _ = cfg.EXAMPLE.RENAMED.KEY # noqa with self.assertRaises(KeyError): core_config.merge_cfg_from_file(f.name) if __name__ == '__main__': logging_utils.setup_logging(__name__) unittest.main() ================================================ FILE: detectron/tests/test_loader.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import logging import unittest import unittest.mock as mock from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import muji from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.roi_data.loader import RoIDataLoader import detectron.utils.logging as logging_utils def get_roidb_blobs(roidb): blobs = {} blobs['data'] = np.stack([entry['data'] for entry in roidb]) return blobs, True def get_net(data_loader, name): logger = logging.getLogger(__name__) blob_names = data_loader.get_output_names() net = core.Net(name) net.type = 'dag' for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): for blob_name in blob_names: blob = core.ScopedName(blob_name) workspace.CreateBlob(blob) net.DequeueBlobs( data_loader._blobs_queue_name, blob_names) logger.info("Protobuf:\n" + str(net.Proto())) return net def get_roidb_sample_data(sample_data): roidb = [] for _ in range(np.random.randint(4, 10)): roidb.append({'data': sample_data}) return roidb def create_loader_and_network(sample_data, name): roidb = get_roidb_sample_data(sample_data) loader = RoIDataLoader(roidb) net = get_net(loader, 'dequeue_net_train') loader.register_sigint_handler() loader.start(prefill=False) return loader, net def run_net(net): workspace.RunNetOnce(net) gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, 0) name_scope = 'gpu_{}'.format(0) with core.NameScope(name_scope): with core.DeviceScope(gpu_dev): data = workspace.FetchBlob(core.ScopedName('data')) return data class TestRoIDataLoader(unittest.TestCase): @mock.patch( 'detectron.roi_data.loader.get_minibatch_blob_names', return_value=[u'data'] ) @mock.patch( 'detectron.roi_data.loader.get_minibatch', side_effect=get_roidb_blobs ) def test_two_parallel_loaders(self, _1, _2): train_data = np.random.rand(2, 3, 3).astype(np.float32) train_loader, train_net = create_loader_and_network(train_data, 'dequeue_net_train') test_data = np.random.rand(2, 4, 4).astype(np.float32) test_loader, test_net = create_loader_and_network(test_data, 'dequeue_net_test') for _ in range(5): data = run_net(train_net) self.assertEqual(data[0].tolist(), train_data.tolist()) data = run_net(test_net) self.assertEqual(data[0].tolist(), test_data.tolist()) test_loader.shutdown() train_loader.shutdown() if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) logger = logging_utils.setup_logging(__name__) logger.setLevel(logging.DEBUG) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.ASPECT_GROUPING = False cfg.NUM_GPUS = 2 assert_and_infer_cfg() unittest.main() ================================================ FILE: detectron/tests/test_restore_checkpoint.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import logging import numpy as np import os import shutil import tempfile from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import get_output_dir from detectron.datasets.roidb import combined_roidb_for_training from detectron.modeling import model_builder from detectron.utils.logging import setup_logging import detectron.utils.c2 as c2_utils import detectron.utils.net as nu c2_utils.import_detectron_ops() def get_params(model): blobs = {} # gpu_0 blobs with unscoped_name as key all_blobs = {} # all blobs with scoped name as key # Save all parameters for param in model.params: scoped_name = str(param) unscoped_name = c2_utils.UnscopeName(scoped_name) if 'gpu_0' in scoped_name: blobs[unscoped_name] = workspace.FetchBlob(scoped_name) all_blobs[scoped_name] = workspace.FetchBlob(scoped_name) for param in model.TrainableParams(): scoped_name = str(param) + '_momentum' unscoped_name = c2_utils.UnscopeName(scoped_name) if 'gpu_0' in scoped_name: blobs[unscoped_name] = workspace.FetchBlob(scoped_name) all_blobs[scoped_name] = workspace.FetchBlob(scoped_name) return blobs, all_blobs def add_momentum_init_ops(model): for param in model.TrainableParams(gpu_id=0): model.param_init_net.GaussianFill( [param + '_momentum'], param + '_momentum', mean=0.0, std=1.0) def init_weights(model): # init weights in gpu_id = 0 and then broadcast workspace.RunNetOnce(model.param_init_net) nu.broadcast_parameters(model) def test_restore_checkpoint(): # Create Model model = model_builder.create(cfg.MODEL.TYPE, train=True) add_momentum_init_ops(model) init_weights(model) # Fill input blobs roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) model_builder.add_training_inputs(model, roidb=roidb) workspace.CreateNet(model.net) # Bookkeeping for checkpoint creation iter_num = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num)) checkpoints[iter_num] = chk_file_path # Save model weights nu.save_model_to_weights_file(checkpoints[iter_num], model) orig_gpu_0_params, orig_all_params = get_params(model) # Change the model weights init_weights(model) # Reload the weights in the model nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0) nu.broadcast_parameters(model) shutil.rmtree(cfg.OUTPUT_DIR) _, restored_all_params = get_params(model) # Check if all params are loaded correctly for scoped_name, blob in orig_all_params.items(): np.testing.assert_array_equal(blob, restored_all_params[scoped_name]) # Check if broadcast_parameters works for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name]) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) logger = setup_logging(__name__) logger.setLevel(logging.DEBUG) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) np.random.seed(cfg.RNG_SEED) output_dir = tempfile.mkdtemp() # Generate config for test cfg.MODEL.TYPE = 'generalized_rcnn' cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body' cfg.MODEL.NUM_CLASSES = 81 cfg.MODEL.FASTER_RCNN = True cfg.FPN.FPN_ON = True cfg.FPN.MULTILEVEL_ROIS = True cfg.FPN.MULTILEVEL_RPN = True cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head' cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign' cfg.OUTPUT_DIR = output_dir cfg.TRAIN.DATASETS = ('coco_2014_minival',) cfg.TRAIN.WEIGHTS = b'' for num_gpu in range(workspace.NumCudaDevices()): cfg.immutable(False) cfg.NUM_GPUS = num_gpu + 1 assert_and_infer_cfg() test_restore_checkpoint() ================================================ FILE: detectron/tests/test_smooth_l1_loss_op.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import unittest from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import gradient_checker from caffe2.python import workspace import detectron.utils.c2 as c2_utils import detectron.utils.logging as logging_utils class SmoothL1LossTest(unittest.TestCase): def test_forward_and_gradient(self): Y = np.random.randn(128, 4 * 21).astype(np.float32) Y_hat = np.random.randn(128, 4 * 21).astype(np.float32) inside_weights = np.random.randn(128, 4 * 21).astype(np.float32) inside_weights[inside_weights < 0] = 0 outside_weights = np.random.randn(128, 4 * 21).astype(np.float32) outside_weights[outside_weights < 0] = 0 scale = np.random.random() beta = np.random.random() op = core.CreateOperator( 'SmoothL1Loss', ['Y_hat', 'Y', 'inside_weights', 'outside_weights'], ['loss'], scale=scale, beta=beta ) gc = gradient_checker.GradientChecker( stepsize=0.005, threshold=0.005, device_option=core.DeviceOption(caffe2_pb2.CUDA, 0) ) res, grad, grad_estimated = gc.CheckSimple( op, [Y_hat, Y, inside_weights, outside_weights], 0, [0] ) self.assertTrue( grad.shape == grad_estimated.shape, 'Fail check: grad.shape != grad_estimated.shape' ) # To inspect the gradient and estimated gradient: # np.set_printoptions(precision=3, suppress=True) # print('grad:') # print(grad) # print('grad_estimated:') # print(grad_estimated) self.assertTrue(res) if __name__ == '__main__': c2_utils.import_detectron_ops() assert 'SmoothL1Loss' in workspace.RegisteredOperators() logging_utils.setup_logging(__name__) unittest.main() ================================================ FILE: detectron/tests/test_spatial_narrow_as_op.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import unittest from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import gradient_checker from caffe2.python import workspace import detectron.utils.c2 as c2_utils import detectron.utils.logging as logging_utils class SpatialNarrowAsOpTest(unittest.TestCase): def _run_test(self, A, B, check_grad=False): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C']) workspace.FeedBlob('A', A) workspace.FeedBlob('B', B) workspace.RunOperatorOnce(op) C = workspace.FetchBlob('C') if check_grad: gc = gradient_checker.GradientChecker( stepsize=0.005, threshold=0.005, device_option=core.DeviceOption(caffe2_pb2.CUDA, 0) ) res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0]) self.assertTrue(res, 'Grad check failed') dims = C.shape C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]] np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08) def test_small_forward_and_gradient(self): A = np.random.randn(2, 3, 5, 7).astype(np.float32) B = np.random.randn(2, 3, 2, 2).astype(np.float32) self._run_test(A, B, check_grad=True) A = np.random.randn(2, 3, 5, 7).astype(np.float32) B = np.random.randn(2, 3, 5).astype(np.float32) self._run_test(A, B, check_grad=True) def test_large_forward(self): A = np.random.randn(2, 256, 42, 100).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) self._run_test(A, B) A = np.random.randn(2, 256, 42, 87).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) self._run_test(A, B) def test_size_exceptions(self): A = np.random.randn(2, 256, 42, 86).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) with self.assertRaises(RuntimeError): self._run_test(A, B) A = np.random.randn(2, 255, 42, 88).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) with self.assertRaises(RuntimeError): self._run_test(A, B) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) c2_utils.import_detectron_ops() assert 'SpatialNarrowAs' in workspace.RegisteredOperators() logging_utils.setup_logging(__name__) unittest.main() ================================================ FILE: detectron/tests/test_zero_even_op.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import unittest from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import workspace import detectron.utils.c2 as c2_utils class ZeroEvenOpTest(unittest.TestCase): def _run_zero_even_op(self, X): op = core.CreateOperator('ZeroEven', ['X'], ['Y']) workspace.FeedBlob('X', X) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') return Y def _run_zero_even_op_gpu(self, X): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): op = core.CreateOperator('ZeroEven', ['X'], ['Y']) workspace.FeedBlob('X', X) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') return Y def test_throws_on_non_1D_arrays(self): X = np.zeros((2, 2), dtype=np.float32) with self.assertRaisesRegex(RuntimeError, 'X\.ndim\(\) == 1'): self._run_zero_even_op(X) def test_handles_empty_arrays(self): X = np.array([], dtype=np.float32) Y_exp = np.copy(X) Y_act = self._run_zero_even_op(X) np.testing.assert_allclose(Y_act, Y_exp) def test_sets_vals_at_even_inds_to_zero(self): X = np.array([0, 1, 2, 3, 4], dtype=np.float32) Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32) Y_act = self._run_zero_even_op(X) np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2]) def test_preserves_vals_at_odd_inds(self): X = np.array([0, 1, 2, 3, 4], dtype=np.float32) Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32) Y_act = self._run_zero_even_op(X) np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2]) def test_handles_even_length_arrays(self): X = np.random.rand(64).astype(np.float32) Y_exp = np.copy(X) Y_exp[0::2] = 0.0 Y_act = self._run_zero_even_op(X) np.testing.assert_allclose(Y_act, Y_exp) def test_handles_odd_length_arrays(self): X = np.random.randn(77).astype(np.float32) Y_exp = np.copy(X) Y_exp[0::2] = 0.0 Y_act = self._run_zero_even_op(X) np.testing.assert_allclose(Y_act, Y_exp) def test_gpu_throws_on_non_1D_arrays(self): X = np.zeros((2, 2), dtype=np.float32) with self.assertRaisesRegex(RuntimeError, 'X\.ndim\(\) == 1'): self._run_zero_even_op_gpu(X) def test_gpu_handles_empty_arrays(self): X = np.array([], dtype=np.float32) Y_exp = np.copy(X) Y_act = self._run_zero_even_op_gpu(X) np.testing.assert_allclose(Y_act, Y_exp) def test_gpu_sets_vals_at_even_inds_to_zero(self): X = np.array([0, 1, 2, 3, 4], dtype=np.float32) Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32) Y_act = self._run_zero_even_op_gpu(X) np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2]) def test_gpu_preserves_vals_at_odd_inds(self): X = np.array([0, 1, 2, 3, 4], dtype=np.float32) Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32) Y_act = self._run_zero_even_op_gpu(X) np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2]) def test_gpu_handles_even_length_arrays(self): X = np.random.rand(64).astype(np.float32) Y_exp = np.copy(X) Y_exp[0::2] = 0.0 Y_act = self._run_zero_even_op_gpu(X) np.testing.assert_allclose(Y_act, Y_exp) def test_gpu_handles_odd_length_arrays(self): X = np.random.randn(77).astype(np.float32) Y_exp = np.copy(X) Y_exp[0::2] = 0.0 Y_act = self._run_zero_even_op_gpu(X) np.testing.assert_allclose(Y_act, Y_exp) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) c2_utils.import_custom_ops() assert 'ZeroEven' in workspace.RegisteredOperators() unittest.main() ================================================ FILE: detectron/utils/__init__.py ================================================ ================================================ FILE: detectron/utils/blob.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Caffe2 blob helper functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import numpy as np from six.moves import cPickle as pickle from caffe2.proto import caffe2_pb2 from detectron.core.config import cfg def get_image_blob(im, target_scale, target_max_size): """Convert an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale (float): image scale (target size) / (original size) im_info (ndarray) """ processed_im, im_scale = prep_im_for_blob( im, cfg.PIXEL_MEANS, target_scale, target_max_size ) blob = im_list_to_blob(processed_im) # NOTE: this height and width may be larger than actual scaled input image # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are # maintaining this behavior for now to make existing results exactly # reproducible (in practice using the true input image height and width # yields nearly the same results, but they are sometimes slightly different # because predictions near the edge of the image will be pruned more # aggressively). height, width = blob.shape[2], blob.shape[3] im_info = np.hstack((height, width, im_scale))[np.newaxis, :] return blob, im_scale, im_info.astype(np.float32) def im_list_to_blob(ims): """Convert a list of images into a network input. Assumes images were prepared using prep_im_for_blob or equivalent: i.e. - BGR channel order - pixel means subtracted - resized to the desired input size - float32 numpy ndarray format Output is a 4D HCHW tensor of the images concatenated along axis 0 with shape. """ if not isinstance(ims, list): ims = [ims] max_shape = np.array([im.shape for im in ims]).max(axis=0) # Pad the image so they can be divisible by a stride if cfg.FPN.FPN_ON: stride = float(cfg.FPN.COARSEST_STRIDE) max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) num_images = len(ims) blob = np.zeros( (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32 ) for i in range(num_images): im = ims[i] blob[i, 0:im.shape[0], 0:im.shape[1], :] = im # Move channels (axis 3) to axis 1 # Axis order will become: (batch elem, channel, height, width) channel_swap = (0, 3, 1, 2) blob = blob.transpose(channel_swap) return blob def prep_im_for_blob(im, pixel_means, target_size, max_size): """Prepare an image for use as a network input blob. Specially: - Subtract per-channel pixel mean - Convert to float32 - Rescale to each of the specified target size (capped at max_size) Returns a list of transformed images, one for each target size. Also returns the scale factors that were used to compute each returned image. """ im = im.astype(np.float32, copy=False) im -= pixel_means im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than max_size if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) im = cv2.resize( im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR ) return im, im_scale def zeros(shape, int32=False): """Return a blob of all zeros of the given shape with the correct float or int data type. """ return np.zeros(shape, dtype=np.int32 if int32 else np.float32) def ones(shape, int32=False): """Return a blob of all ones of the given shape with the correct float or int data type. """ return np.ones(shape, dtype=np.int32 if int32 else np.float32) def py_op_copy_blob(blob_in, blob_out): """Copy a numpy ndarray given as blob_in into the Caffe2 CPUTensor blob given as blob_out. Supports float32 and int32 blob data types. This function is intended for copying numpy data into a Caffe2 blob in PythonOps. """ # Some awkward voodoo required by Caffe2 to support int32 blobs needs_int32_init = False try: _ = blob.data.dtype # noqa except Exception: needs_int32_init = blob_in.dtype == np.int32 if needs_int32_init: # init can only take a list (failed on tuple) blob_out.init(list(blob_in.shape), caffe2_pb2.TensorProto.INT32) else: blob_out.reshape(blob_in.shape) blob_out.data[...] = blob_in def get_loss_gradients(model, loss_blobs): """Generate a gradient of 1 for each loss specified in 'loss_blobs'""" loss_gradients = {} for b in loss_blobs: loss_grad = model.net.ConstantFill(b, [b + '_grad'], value=1.0) loss_gradients[str(b)] = str(loss_grad) return loss_gradients def serialize(obj): """Serialize a Python object using pickle and encode it as an array of float32 values so that it can be feed into the workspace. See deserialize(). """ return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32) def deserialize(arr): """Unserialize a Python object from an array of float32 values fetched from a workspace. See serialize(). """ return pickle.loads(arr.astype(np.uint8).tobytes()) ================================================ FILE: detectron/utils/boxes.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast/er R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Box manipulation functions. The internal Detectron box format is [x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2) specify the bottom-right box corner. Boxes from external sources, e.g., datasets, may be in other formats (such as [x, y, w, h]) and require conversion. This module uses a convention that may seem strange at first: the width of a box is computed as x2 - x1 + 1 (likewise for height). The "+ 1" dates back to old object detection days when the coordinates were integer pixel indices, rather than floating point coordinates in a subpixel coordinate frame. A box with x2 = x1 and y2 = y1 was taken to include a single pixel, having a width of 1, and hence requiring the "+ 1". Now, most datasets will likely provide boxes with floating point coordinates and the width should be more reasonably computed as x2 - x1. In practice, as long as a model is trained and tested with a consistent convention either decision seems to be ok (at least in our experience on COCO). Since we have a long history of training models with the "+ 1" convention, we are reluctant to change it even if our modern tastes prefer not to use it. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np from detectron.core.config import cfg import detectron.utils.cython_bbox as cython_bbox import detectron.utils.cython_nms as cython_nms bbox_overlaps = cython_bbox.bbox_overlaps def boxes_area(boxes): """Compute the area of an array of boxes.""" w = (boxes[:, 2] - boxes[:, 0] + 1) h = (boxes[:, 3] - boxes[:, 1] + 1) areas = w * h assert np.all(areas >= 0), 'Negative areas founds' return areas def unique_boxes(boxes, scale=1.0): """Return indices of unique boxes.""" v = np.array([1, 1e3, 1e6, 1e9]) hashes = np.round(boxes * scale).dot(v) _, index = np.unique(hashes, return_index=True) return np.sort(index) def xywh_to_xyxy(xywh): """Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format.""" if isinstance(xywh, (list, tuple)): # Single box given as a list of coordinates assert len(xywh) == 4 x1, y1 = xywh[0], xywh[1] x2 = x1 + np.maximum(0., xywh[2] - 1.) y2 = y1 + np.maximum(0., xywh[3] - 1.) return (x1, y1, x2, y2) elif isinstance(xywh, np.ndarray): # Multiple boxes given as a 2D ndarray return np.hstack( (xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1)) ) else: raise TypeError('Argument xywh must be a list, tuple, or numpy array.') def xyxy_to_xywh(xyxy): """Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format.""" if isinstance(xyxy, (list, tuple)): # Single box given as a list of coordinates assert len(xyxy) == 4 x1, y1 = xyxy[0], xyxy[1] w = xyxy[2] - x1 + 1 h = xyxy[3] - y1 + 1 return (x1, y1, w, h) elif isinstance(xyxy, np.ndarray): # Multiple boxes given as a 2D ndarray return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1)) else: raise TypeError('Argument xyxy must be a list, tuple, or numpy array.') def filter_small_boxes(boxes, min_size): """Keep boxes with width and height both greater than min_size.""" w = boxes[:, 2] - boxes[:, 0] + 1 h = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((w > min_size) & (h > min_size))[0] return keep def clip_boxes_to_image(boxes, height, width): """Clip an array of boxes to an image with the given height and width.""" boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]])) boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]])) return boxes def clip_xyxy_to_image(x1, y1, x2, y2, height, width): """Clip coordinates to an image with the given height and width.""" x1 = np.minimum(width - 1., np.maximum(0., x1)) y1 = np.minimum(height - 1., np.maximum(0., y1)) x2 = np.minimum(width - 1., np.maximum(0., x2)) y2 = np.minimum(height - 1., np.maximum(0., y2)) return x1, y1, x2, y2 def clip_tiled_boxes(boxes, im_shape): """Clip boxes to image boundaries. im_shape is [height, width] and boxes has shape (N, 4 * num_tiled_boxes).""" assert boxes.shape[1] % 4 == 0, \ 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( boxes.shape[1] ) # x1 >= 0 boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) # y1 >= 0 boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) # x2 < im_shape[1] boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) # y2 < im_shape[0] boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) return boxes def bbox_transform(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)): """Forward transform that maps proposal boxes to predicted ground-truth boxes using bounding-box regression deltas. See bbox_transform_inv for a description of the weights argument. """ if boxes.shape[0] == 0: return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) boxes = boxes.astype(deltas.dtype, copy=False) widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights wx, wy, ww, wh = weights dx = deltas[:, 0::4] / wx dy = deltas[:, 1::4] / wy dw = deltas[:, 2::4] / ww dh = deltas[:, 3::4] / wh # Prevent sending too large values into np.exp() dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP) dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP) pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] pred_w = np.exp(dw) * widths[:, np.newaxis] pred_h = np.exp(dh) * heights[:, np.newaxis] pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) # x1 pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # y1 pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 return pred_boxes def bbox_transform_inv(boxes, gt_boxes, weights=(1.0, 1.0, 1.0, 1.0)): """Inverse transform that computes target bounding-box regression deltas given proposal boxes and ground-truth boxes. The weights argument should be a 4-tuple of multiplicative weights that are applied to the regression target. In older versions of this code (and in py-faster-rcnn), the weights were set such that the regression deltas would have unit standard deviation on the training dataset. Presently, rather than computing these statistics exactly, we use a fixed set of weights (10., 10., 5., 5.) by default. These are approximately the weights one would get from COCO using the previous unit stdev heuristic. """ ex_widths = boxes[:, 2] - boxes[:, 0] + 1.0 ex_heights = boxes[:, 3] - boxes[:, 1] + 1.0 ex_ctr_x = boxes[:, 0] + 0.5 * ex_widths ex_ctr_y = boxes[:, 1] + 0.5 * ex_heights gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0 gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0 gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights wx, wy, ww, wh = weights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * np.log(gt_widths / ex_widths) targets_dh = wh * np.log(gt_heights / ex_heights) targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return targets def expand_boxes(boxes, scale): """Expand an array of boxes by a given scale.""" w_half = (boxes[:, 2] - boxes[:, 0]) * .5 h_half = (boxes[:, 3] - boxes[:, 1]) * .5 x_c = (boxes[:, 2] + boxes[:, 0]) * .5 y_c = (boxes[:, 3] + boxes[:, 1]) * .5 w_half *= scale h_half *= scale boxes_exp = np.zeros(boxes.shape) boxes_exp[:, 0] = x_c - w_half boxes_exp[:, 2] = x_c + w_half boxes_exp[:, 1] = y_c - h_half boxes_exp[:, 3] = y_c + h_half return boxes_exp def flip_boxes(boxes, im_width): """Flip boxes horizontally.""" boxes_flipped = boxes.copy() boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1 boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1 return boxes_flipped def aspect_ratio(boxes, aspect_ratio): """Perform width-relative aspect ratio transformation.""" boxes_ar = boxes.copy() boxes_ar[:, 0::4] = aspect_ratio * boxes[:, 0::4] boxes_ar[:, 2::4] = aspect_ratio * boxes[:, 2::4] return boxes_ar def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0): """Apply bounding-box voting to refine `top_dets` by voting with `all_dets`. See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the referenced paper) can be applied by setting `scoring_method` appropriately. """ # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore] # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore] top_dets_out = top_dets.copy() top_boxes = top_dets[:, :4] all_boxes = all_dets[:, :4] all_scores = all_dets[:, 4] top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes) for k in range(top_dets_out.shape[0]): inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0] boxes_to_vote = all_boxes[inds_to_vote, :] ws = all_scores[inds_to_vote] top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws) if scoring_method == 'ID': # Identity, nothing to do pass elif scoring_method == 'TEMP_AVG': # Average probabilities (considered as P(detected class) vs. # P(not the detected class)) after smoothing with a temperature # hyperparameter. P = np.vstack((ws, 1.0 - ws)) P_max = np.max(P, axis=0) X = np.log(P / P_max) X_exp = np.exp(X / beta) P_temp = X_exp / np.sum(X_exp, axis=0) P_avg = P_temp[0].mean() top_dets_out[k, 4] = P_avg elif scoring_method == 'AVG': # Combine new probs from overlapping boxes top_dets_out[k, 4] = ws.mean() elif scoring_method == 'IOU_AVG': P = ws ws = top_to_all_overlaps[k, inds_to_vote] P_avg = np.average(P, weights=ws) top_dets_out[k, 4] = P_avg elif scoring_method == 'GENERALIZED_AVG': P_avg = np.mean(ws**beta)**(1.0 / beta) top_dets_out[k, 4] = P_avg elif scoring_method == 'QUASI_SUM': top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta else: raise NotImplementedError( 'Unknown scoring method {}'.format(scoring_method) ) return top_dets_out def nms(dets, thresh): """Apply classic DPM-style greedy NMS.""" if dets.shape[0] == 0: return [] return cython_nms.nms(dets, thresh) def soft_nms( dets, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear' ): """Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503.""" if dets.shape[0] == 0: return dets, [] methods = {'hard': 0, 'linear': 1, 'gaussian': 2} assert method in methods, 'Unknown soft_nms method: {}'.format(method) dets, keep = cython_nms.soft_nms( np.ascontiguousarray(dets, dtype=np.float32), np.float32(sigma), np.float32(overlap_thresh), np.float32(score_thresh), np.uint8(methods[method]) ) return dets, keep ================================================ FILE: detectron/utils/c2.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Helpful utilities for working with Caffe2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from six import string_types import contextlib import subprocess from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import dyndep from caffe2.python import scope from caffe2.python import workspace import detectron.utils.env as envu def import_contrib_ops(): """Import contrib ops needed by Detectron.""" envu.import_nccl_ops() def import_detectron_ops(): """Import Detectron ops.""" detectron_ops_lib = envu.get_detectron_ops_lib() dyndep.InitOpsLibrary(detectron_ops_lib) def import_custom_ops(): """Import custom ops.""" custom_ops_lib = envu.get_custom_ops_lib() dyndep.InitOpsLibrary(custom_ops_lib) def SuffixNet(name, net, prefix_len, outputs): """Returns a new Net from the given Net (`net`) that includes only the ops after removing the first `prefix_len` number of ops. The new Net is thus a suffix of `net`. Blobs listed in `outputs` are registered as external output blobs. """ outputs = BlobReferenceList(outputs) for output in outputs: assert net.BlobIsDefined(output) new_net = net.Clone(name) del new_net.Proto().op[:] del new_net.Proto().external_input[:] del new_net.Proto().external_output[:] # Add suffix ops new_net.Proto().op.extend(net.Proto().op[prefix_len:]) # Add external input blobs # Treat any undefined blobs as external inputs input_names = [ i for op in new_net.Proto().op for i in op.input if not new_net.BlobIsDefined(i)] new_net.Proto().external_input.extend(input_names) # Add external output blobs output_names = [str(o) for o in outputs] new_net.Proto().external_output.extend(output_names) return new_net, [new_net.GetBlobRef(o) for o in output_names] def BlobReferenceList(blob_ref_or_list): """Ensure that the argument is returned as a list of BlobReferences.""" if isinstance(blob_ref_or_list, core.BlobReference): return [blob_ref_or_list] elif type(blob_ref_or_list) in (list, tuple): for b in blob_ref_or_list: assert isinstance(b, core.BlobReference) return blob_ref_or_list else: raise TypeError( 'blob_ref_or_list must be a BlobReference or a list/tuple of ' 'BlobReferences' ) def UnscopeName(possibly_scoped_name): """Remove any name scoping from a (possibly) scoped name. For example, convert the name 'gpu_0/foo' to 'foo'.""" assert isinstance(possibly_scoped_name, string_types) return possibly_scoped_name[ possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:] @contextlib.contextmanager def NamedCudaScope(gpu_id): """Creates a GPU name scope and CUDA device scope. This function is provided to reduce `with ...` nesting levels.""" with GpuNameScope(gpu_id): with CudaScope(gpu_id): yield @contextlib.contextmanager def GpuNameScope(gpu_id): """Create a name scope for GPU device `gpu_id`.""" with core.NameScope('gpu_{:d}'.format(gpu_id)): yield @contextlib.contextmanager def CudaScope(gpu_id): """Create a CUDA device scope for GPU device `gpu_id`.""" gpu_dev = CudaDevice(gpu_id) with core.DeviceScope(gpu_dev): yield @contextlib.contextmanager def CpuScope(): """Create a CPU device scope.""" cpu_dev = core.DeviceOption(caffe2_pb2.CPU) with core.DeviceScope(cpu_dev): yield def CudaDevice(gpu_id): """Create a Cuda device.""" return core.DeviceOption(caffe2_pb2.CUDA, gpu_id) def gauss_fill(std): """Gaussian fill helper to reduce verbosity.""" return ('GaussianFill', {'std': std}) def const_fill(value): """Constant fill helper to reduce verbosity.""" return ('ConstantFill', {'value': value}) def get_nvidia_info(): return ( get_nvidia_smi_output(), workspace.GetCUDAVersion(), workspace.GetCuDNNVersion(), ) def get_nvidia_smi_output(): try: info = subprocess.check_output(["nvidia-smi"], stderr=subprocess.STDOUT) info = info.decode("utf8") except Exception as e: info = "Executing nvidia-smi failed: " + str(e) return info.strip() ================================================ FILE: detectron/utils/collections.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """A simple attribute dictionary used for representing configuration options.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals class AttrDict(dict): IMMUTABLE = '__immutable__' def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__[AttrDict.IMMUTABLE] = False def __getattr__(self, name): if name in self.__dict__: return self.__dict__[name] elif name in self: return self[name] else: raise AttributeError(name) def __setattr__(self, name, value): if not self.__dict__[AttrDict.IMMUTABLE]: if name in self.__dict__: self.__dict__[name] = value else: self[name] = value else: raise AttributeError( 'Attempted to set "{}" to "{}", but AttrDict is immutable'. format(name, value) ) def immutable(self, is_immutable): """Set immutability to is_immutable and recursively apply the setting to all nested AttrDicts. """ self.__dict__[AttrDict.IMMUTABLE] = is_immutable # Recursively set immutable state for v in self.__dict__.values(): if isinstance(v, AttrDict): v.immutable(is_immutable) for v in self.values(): if isinstance(v, AttrDict): v.immutable(is_immutable) def is_immutable(self): return self.__dict__[AttrDict.IMMUTABLE] ================================================ FILE: detectron/utils/colormap.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """An awesome colormap for really neat visualizations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np def colormap(rgb=False): color_list = np.array( [ 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000 ] ).astype(np.float32) color_list = color_list.reshape((-1, 3)) * 255 if not rgb: color_list = color_list[:, ::-1] return color_list ================================================ FILE: detectron/utils/coordinator.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Coordinated access to a shared multithreading/processing queue.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import contextlib import logging import threading import traceback from six.moves import queue as Queue log = logging.getLogger(__name__) class Coordinator: def __init__(self): self._event = threading.Event() def request_stop(self): log.debug('Coordinator stopping') self._event.set() def should_stop(self): return self._event.is_set() def wait_for_stop(self): return self._event.wait() @contextlib.contextmanager def stop_on_exception(self): try: yield except Exception: if not self.should_stop(): traceback.print_exc() self.request_stop() def coordinated_get(coordinator, queue): while not coordinator.should_stop(): try: return queue.get(block=True, timeout=1.0) except Queue.Empty: continue raise Exception('Coordinator stopped during get()') def coordinated_put(coordinator, queue, element): while not coordinator.should_stop(): try: queue.put(element, block=True, timeout=1.0) return except Queue.Full: continue raise Exception('Coordinator stopped during put()') ================================================ FILE: detectron/utils/cython_bbox.pyx ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Sergey Karayev # -------------------------------------------------------- cimport cython import numpy as np cimport numpy as np DTYPE = np.float32 ctypedef np.float32_t DTYPE_t @cython.boundscheck(False) def bbox_overlaps( np.ndarray[DTYPE_t, ndim=2] boxes, np.ndarray[DTYPE_t, ndim=2] query_boxes): """ Parameters ---------- boxes: (N, 4) ndarray of float query_boxes: (K, 4) ndarray of float Returns ------- overlaps: (N, K) ndarray of overlap between boxes and query_boxes """ cdef unsigned int N = boxes.shape[0] cdef unsigned int K = query_boxes.shape[0] cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) cdef DTYPE_t iw, ih, box_area cdef DTYPE_t ua cdef unsigned int k, n with nogil: for k in range(K): box_area = ( (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) ) for n in range(N): iw = ( min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 ) if iw > 0: ih = ( min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 ) if ih > 0: ua = float( (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + box_area - iw * ih ) overlaps[n, k] = iw * ih / ua return overlaps ================================================ FILE: detectron/utils/cython_nms.pyx ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- cimport cython import numpy as np cimport numpy as np cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil: return a if a >= b else b cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil: return a if a <= b else b @cython.boundscheck(False) @cython.cdivision(True) @cython.wraparound(False) def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh): cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] cdef int ndets = dets.shape[0] cdef np.ndarray[np.int_t, ndim=1] suppressed = \ np.zeros((ndets), dtype=np.int) # nominal indices cdef int _i, _j # sorted indices cdef int i, j # temp variables for box i's (the box currently under consideration) cdef np.float32_t ix1, iy1, ix2, iy2, iarea # variables for computing overlap with box j (lower scoring box) cdef np.float32_t xx1, yy1, xx2, yy2 cdef np.float32_t w, h cdef np.float32_t inter, ovr with nogil: for _i in range(ndets): i = order[_i] if suppressed[i] == 1: continue ix1 = x1[i] iy1 = y1[i] ix2 = x2[i] iy2 = y2[i] iarea = areas[i] for _j in range(_i + 1, ndets): j = order[_j] if suppressed[j] == 1: continue xx1 = max(ix1, x1[j]) yy1 = max(iy1, y1[j]) xx2 = min(ix2, x2[j]) yy2 = min(iy2, y2[j]) w = max(0.0, xx2 - xx1 + 1) h = max(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (iarea + areas[j] - inter) if ovr >= thresh: suppressed[j] = 1 return np.where(suppressed == 0)[0] # ---------------------------------------------------------- # Soft-NMS: Improving Object Detection With One Line of Code # Copyright (c) University of Maryland, College Park # Licensed under The MIT License [see LICENSE for details] # Written by Navaneeth Bodla and Bharat Singh # ---------------------------------------------------------- @cython.boundscheck(False) @cython.cdivision(True) @cython.wraparound(False) def soft_nms( np.ndarray[float, ndim=2] boxes_in, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0 ): boxes = boxes_in.copy() cdef unsigned int N = boxes.shape[0] cdef float iw, ih, box_area cdef float ua cdef int pos = 0 cdef float maxscore = 0 cdef int maxpos = 0 cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov inds = np.arange(N) for i in range(N): maxscore = boxes[i, 4] maxpos = i tx1 = boxes[i,0] ty1 = boxes[i,1] tx2 = boxes[i,2] ty2 = boxes[i,3] ts = boxes[i,4] ti = inds[i] pos = i + 1 # get max box while pos < N: if maxscore < boxes[pos, 4]: maxscore = boxes[pos, 4] maxpos = pos pos = pos + 1 # add max box as a detection boxes[i,0] = boxes[maxpos,0] boxes[i,1] = boxes[maxpos,1] boxes[i,2] = boxes[maxpos,2] boxes[i,3] = boxes[maxpos,3] boxes[i,4] = boxes[maxpos,4] inds[i] = inds[maxpos] # swap ith box with position of max box boxes[maxpos,0] = tx1 boxes[maxpos,1] = ty1 boxes[maxpos,2] = tx2 boxes[maxpos,3] = ty2 boxes[maxpos,4] = ts inds[maxpos] = ti tx1 = boxes[i,0] ty1 = boxes[i,1] tx2 = boxes[i,2] ty2 = boxes[i,3] ts = boxes[i,4] pos = i + 1 # NMS iterations, note that N changes if detection boxes fall below # threshold while pos < N: x1 = boxes[pos, 0] y1 = boxes[pos, 1] x2 = boxes[pos, 2] y2 = boxes[pos, 3] s = boxes[pos, 4] area = (x2 - x1 + 1) * (y2 - y1 + 1) iw = (min(tx2, x2) - max(tx1, x1) + 1) if iw > 0: ih = (min(ty2, y2) - max(ty1, y1) + 1) if ih > 0: ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) ov = iw * ih / ua #iou between max box and detection box if method == 1: # linear if ov > Nt: weight = 1 - ov else: weight = 1 elif method == 2: # gaussian weight = np.exp(-(ov * ov)/sigma) else: # original NMS if ov > Nt: weight = 0 else: weight = 1 boxes[pos, 4] = weight*boxes[pos, 4] # if box score falls below threshold, discard the box by # swapping with last box update N if boxes[pos, 4] < threshold: boxes[pos,0] = boxes[N-1, 0] boxes[pos,1] = boxes[N-1, 1] boxes[pos,2] = boxes[N-1, 2] boxes[pos,3] = boxes[N-1, 3] boxes[pos,4] = boxes[N-1, 4] inds[pos] = inds[N-1] N = N - 1 pos = pos - 1 pos = pos + 1 return boxes[:N], inds[:N] ================================================ FILE: detectron/utils/env.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Environment helper functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import os import sys import yaml # Default value of the CMake install prefix _CMAKE_INSTALL_PREFIX = '/usr/local' # Detectron ops lib _DETECTRON_OPS_LIB = 'libcaffe2_detectron_ops_gpu.so' def get_runtime_dir(): """Retrieve the path to the runtime directory.""" return sys.path[0] def get_py_bin_ext(): """Retrieve python binary extension.""" return '.py' def set_up_matplotlib(): """Set matplotlib up.""" import matplotlib # Use a non-interactive backend matplotlib.use('Agg') def exit_on_error(): """Exit from a detectron tool when there's an error.""" sys.exit(1) def import_nccl_ops(): """Import NCCL ops.""" # There is no need to load NCCL ops since the # NCCL dependency is built into the Caffe2 gpu lib pass def get_detectron_ops_lib(): """Retrieve Detectron ops library.""" # Candidate prefixes for detectron ops lib path prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path # Candidate subdirs for detectron ops lib subdirs = ['lib', 'torch/lib'] # Try to find detectron ops lib for prefix in prefixes: for subdir in subdirs: ops_path = os.path.join(prefix, subdir, _DETECTRON_OPS_LIB) if os.path.exists(ops_path): print('Found Detectron ops lib: {}'.format(ops_path)) return ops_path raise Exception('Detectron ops lib not found') def get_custom_ops_lib(): """Retrieve custom ops library.""" det_dir, _ = os.path.split(os.path.dirname(__file__)) root_dir, _ = os.path.split(det_dir) custom_ops_lib = os.path.join( root_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so') assert os.path.exists(custom_ops_lib), \ 'Custom ops lib not found at \'{}\''.format(custom_ops_lib) return custom_ops_lib # YAML load/dump function aliases yaml_load = yaml.load yaml_dump = yaml.dump ================================================ FILE: detectron/utils/image.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Image helper functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import numpy as np def aspect_ratio_rel(im, aspect_ratio): """Performs width-relative aspect ratio transformation.""" im_h, im_w = im.shape[:2] im_ar_w = int(round(aspect_ratio * im_w)) im_ar = cv2.resize(im, dsize=(im_ar_w, im_h)) return im_ar def aspect_ratio_abs(im, aspect_ratio): """Performs absolute aspect ratio transformation.""" im_h, im_w = im.shape[:2] im_area = im_h * im_w im_ar_w = np.sqrt(im_area * aspect_ratio) im_ar_h = np.sqrt(im_area / aspect_ratio) assert np.isclose(im_ar_w / im_ar_h, aspect_ratio) im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h))) return im_ar ================================================ FILE: detectron/utils/io.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """IO utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import errno import hashlib import logging import os import re import six import sys from six.moves import cPickle as pickle from six.moves import urllib from uuid import uuid4 logger = logging.getLogger(__name__) _DETECTRON_S3_BASE_URL = 'https://dl.fbaipublicfiles.com/detectron' def save_object(obj, file_name, pickle_format=2): """Save a Python object by pickling it. Unless specifically overridden, we want to save it in Pickle format=2 since this will allow other Python2 executables to load the resulting Pickle. When we want to completely remove Python2 backward-compatibility, we can bump it up to 3. We should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting file is manifested or used, external to the system. """ file_name = os.path.abspath(file_name) # Avoid filesystem race conditions (particularly on network filesystems) # by saving to a random tmp file on the same filesystem, and then # atomically rename to the target filename. tmp_file_name = file_name + ".tmp." + uuid4().hex try: with open(tmp_file_name, 'wb') as f: pickle.dump(obj, f, pickle_format) f.flush() # make sure it's written to disk os.fsync(f.fileno()) os.rename(tmp_file_name, file_name) finally: # Clean up the temp file on failure. Rather than using os.path.exists(), # which can be unreliable on network filesystems, attempt to delete and # ignore os errors. try: os.remove(tmp_file_name) except EnvironmentError as e: # parent class of IOError, OSError if getattr(e, 'errno', None) != errno.ENOENT: # We expect ENOENT logger.info("Could not delete temp file %r", tmp_file_name, exc_info=True) # pass through since we don't want the job to crash def load_object(file_name): with open(file_name, 'rb') as f: # The default encoding used while unpickling is 7-bit (ASCII.) However, # the blobs are arbitrary 8-bit bytes which don't agree. The absolute # correct way to do this is to use `encoding="bytes"` and then interpret # the blob names either as ASCII, or better, as unicode utf-8. A # reasonable fix, however, is to treat it the encoding as 8-bit latin1 # (which agrees with the first 256 characters of Unicode anyway.) if six.PY2: return pickle.load(f) else: return pickle.load(f, encoding='latin1') def cache_url(url_or_file, cache_dir): """Download the file specified by the URL to the cache_dir and return the path to the cached file. If the argument is not a URL, simply return it as is. """ is_url = re.match( r'^(?:http)s?://', url_or_file, re.IGNORECASE ) is not None if not is_url: return url_or_file url = url_or_file assert url.startswith(_DETECTRON_S3_BASE_URL), \ ('Detectron only automatically caches URLs in the Detectron S3 ' 'bucket: {}').format(_DETECTRON_S3_BASE_URL) cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir) if os.path.exists(cache_file_path): assert_cache_file_is_ok(url, cache_file_path) return cache_file_path cache_file_dir = os.path.dirname(cache_file_path) if not os.path.exists(cache_file_dir): os.makedirs(cache_file_dir) logger.info('Downloading remote file {} to {}'.format(url, cache_file_path)) download_url(url, cache_file_path) assert_cache_file_is_ok(url, cache_file_path) return cache_file_path def assert_cache_file_is_ok(url, file_path): """Check that cache file has the correct hash.""" # File is already in the cache, verify that the md5sum matches and # return local path cache_file_md5sum = _get_file_md5sum(file_path) ref_md5sum = _get_reference_md5sum(url) assert cache_file_md5sum == ref_md5sum, \ ('Target URL {} appears to be downloaded to the local cache file ' '{}, but the md5 hash of the local file does not match the ' 'reference (actual: {} vs. expected: {}). You may wish to delete ' 'the cached file and try again to trigger automatic ' 'download.').format(url, file_path, cache_file_md5sum, ref_md5sum) def _progress_bar(count, total): """Report download progress. Credit: https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113 """ bar_len = 60 filled_len = int(round(bar_len * count / float(total))) percents = round(100.0 * count / float(total), 1) bar = '=' * filled_len + '-' * (bar_len - filled_len) sys.stdout.write( ' [{}] {}% of {:.1f}MB file \r'. format(bar, percents, total / 1024 / 1024) ) sys.stdout.flush() if count >= total: sys.stdout.write('\n') def download_url( url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar ): """Download url and write it to dst_file_path. Credit: https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook """ response = urllib.request.urlopen(url) if six.PY2: total_size = response.info().getheader('Content-Length').strip() else: total_size = response.info().get('Content-Length').strip() total_size = int(total_size) bytes_so_far = 0 with open(dst_file_path, 'wb') as f: while 1: chunk = response.read(chunk_size) bytes_so_far += len(chunk) if not chunk: break if progress_hook: progress_hook(bytes_so_far, total_size) f.write(chunk) return bytes_so_far def _get_file_md5sum(file_name): """Compute the md5 hash of a file.""" hash_obj = hashlib.md5() with open(file_name, 'rb') as f: hash_obj.update(f.read()) return hash_obj.hexdigest().encode('utf-8') def _get_reference_md5sum(url): """By convention the md5 hash for url is stored in url + '.md5sum'.""" url_md5sum = url + '.md5sum' md5sum = urllib.request.urlopen(url_md5sum).read().strip() return md5sum ================================================ FILE: detectron/utils/keypoints.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Keypoint utilities (somewhat specific to COCO keypoints).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import numpy as np from detectron.core.config import cfg import detectron.utils.blob as blob_utils def get_keypoints(): """Get the COCO keypoints and their left/right flip coorespondence map.""" # Keypoints are not available in the COCO json for the test split, so we # provide them here. keypoints = [ 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle' ] keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle' } return keypoints, keypoint_flip_map def get_person_class_index(): """Index of the person class in COCO.""" return 1 def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width): """Left/right flip keypoint_coords. keypoints and keypoint_flip_map are accessible from get_keypoints(). """ flipped_kps = keypoint_coords.copy() for lkp, rkp in keypoint_flip_map.items(): lid = keypoints.index(lkp) rid = keypoints.index(rkp) flipped_kps[:, :, lid] = keypoint_coords[:, :, rid] flipped_kps[:, :, rid] = keypoint_coords[:, :, lid] # Flip x coordinates flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1 # Maintain COCO convention that if visibility == 0, then x, y = 0 inds = np.where(flipped_kps[:, 2, :] == 0) flipped_kps[inds[0], 0, inds[1]] = 0 return flipped_kps def flip_heatmaps(heatmaps): """Flip heatmaps horizontally.""" keypoints, flip_map = get_keypoints() heatmaps_flipped = heatmaps.copy() for lkp, rkp in flip_map.items(): lid = keypoints.index(lkp) rid = keypoints.index(rkp) heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :] heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :] heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1] return heatmaps_flipped def heatmaps_to_keypoints(maps, rois): """Extract predicted keypoint locations from heatmaps. Output has shape (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) for each keypoint. """ # This function converts a discrete image coordinate in a HEATMAP_SIZE x # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain # consistency with keypoints_to_heatmap_labels by using the conversion from # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a # continuous coordinate. offset_x = rois[:, 0] offset_y = rois[:, 1] widths = rois[:, 2] - rois[:, 0] heights = rois[:, 3] - rois[:, 1] widths = np.maximum(widths, 1) heights = np.maximum(heights, 1) widths_ceil = np.ceil(widths) heights_ceil = np.ceil(heights) # NCHW to NHWC for use with OpenCV maps = np.transpose(maps, [0, 2, 3, 1]) min_size = cfg.KRCNN.INFERENCE_MIN_SIZE xy_preds = np.zeros( (len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32) for i in range(len(rois)): if min_size > 0: roi_map_width = int(np.maximum(widths_ceil[i], min_size)) roi_map_height = int(np.maximum(heights_ceil[i], min_size)) else: roi_map_width = widths_ceil[i] roi_map_height = heights_ceil[i] width_correction = widths[i] / roi_map_width height_correction = heights[i] / roi_map_height roi_map = cv2.resize( maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC) # Bring back to CHW roi_map = np.transpose(roi_map, [2, 0, 1]) roi_map_probs = scores_to_probs(roi_map.copy()) w = roi_map.shape[2] for k in range(cfg.KRCNN.NUM_KEYPOINTS): pos = roi_map[k, :, :].argmax() x_int = pos % w y_int = (pos - x_int) // w assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max()) x = (x_int + 0.5) * width_correction y = (y_int + 0.5) * height_correction xy_preds[i, 0, k] = x + offset_x[i] xy_preds[i, 1, k] = y + offset_y[i] xy_preds[i, 2, k] = roi_map[k, y_int, x_int] xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int] return xy_preds def keypoints_to_heatmap_labels(keypoints, rois): """Encode keypoint location in the target heatmap for use in SoftmaxWithLoss. """ # Maps keypoints from the half-open interval [x1, x2) on continuous image # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image # coordinates. We use the continuous <-> discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS) heatmaps = blob_utils.zeros(shape) weights = blob_utils.zeros(shape) offset_x = rois[:, 0] offset_y = rois[:, 1] scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0]) scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1]) for kp in range(keypoints.shape[2]): vis = keypoints[:, 2, kp] > 0 x = keypoints[:, 0, kp].astype(np.float32) y = keypoints[:, 1, kp].astype(np.float32) # Since we use floor below, if a keypoint is exactly on the roi's right # or bottom boundary, we shift it in by eps (conceptually) to keep it in # the ground truth heatmap. x_boundary_inds = np.where(x == rois[:, 2])[0] y_boundary_inds = np.where(y == rois[:, 3])[0] x = (x - offset_x) * scale_x x = np.floor(x) if len(x_boundary_inds) > 0: x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 y = (y - offset_y) * scale_y y = np.floor(y) if len(y_boundary_inds) > 0: y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 valid_loc = np.logical_and( np.logical_and(x >= 0, y >= 0), np.logical_and( x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE)) valid = np.logical_and(valid_loc, vis) valid = valid.astype(np.int32) lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x heatmaps[:, kp] = lin_ind * valid weights[:, kp] = valid return heatmaps, weights def scores_to_probs(scores): """Transforms CxHxW of scores to probabilities spatially.""" channels = scores.shape[0] for c in range(channels): temp = scores[c, :, :] max_score = temp.max() temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score)) scores[c, :, :] = temp return scores def nms_oks(kp_predictions, rois, thresh): """Nms based on kp predictions.""" scores = np.mean(kp_predictions[:, 2, :], axis=1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) ovr = compute_oks( kp_predictions[i], rois[i], kp_predictions[order[1:]], rois[order[1:]]) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return keep def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi): """Compute OKS for predicted keypoints wrt gt_keypoints. src_keypoints: 4xK src_roi: 4x1 dst_keypoints: Nx4xK dst_roi: Nx4 """ sigmas = np.array([ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 vars = (sigmas * 2)**2 # area src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1) # measure the per-keypoint distance if keypoints visible dx = dst_keypoints[:, 0, :] - src_keypoints[0, :] dy = dst_keypoints[:, 1, :] - src_keypoints[1, :] e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2 e = np.sum(np.exp(-e), axis=1) / e.shape[1] return e ================================================ FILE: detectron/utils/logging.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Utilities for logging.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import deque from email.mime.text import MIMEText import json import logging import numpy as np import smtplib import sys def log_json_stats(stats, sort_keys=True): # hack to control precision of top-level floats stats = { k: '{:.6f}'.format(v) if isinstance(v, float) else v for k, v in stats.items() } print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys))) class SmoothedValue: """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size): self.deque = deque(maxlen=window_size) self.series = [] self.total = 0.0 self.count = 0 def AddValue(self, value): self.deque.append(value) self.series.append(value) self.count += 1 self.total += value def GetMedianValue(self): return np.median(self.deque) def GetAverageValue(self): return np.mean(self.deque) def GetGlobalAverageValue(self): return self.total / self.count def send_email(subject, body, to): s = smtplib.SMTP('localhost') mime = MIMEText(body) mime['Subject'] = subject mime['To'] = to s.sendmail('detectron', to, mime.as_string()) def setup_logging(name): FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s' # Manually clear root loggers to prevent any module that may have called # logging.basicConfig() from blocking our logging setup logging.root.handlers = [] logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) logger = logging.getLogger(name) return logger ================================================ FILE: detectron/utils/lr_policy.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Learning rate policies.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np from detectron.core.config import cfg def get_lr_at_iter(it): """Get the learning rate at iteration it according to the cfg.SOLVER settings. """ lr = get_lr_func()(it) if it < cfg.SOLVER.WARM_UP_ITERS: method = cfg.SOLVER.WARM_UP_METHOD if method == 'constant': warmup_factor = cfg.SOLVER.WARM_UP_FACTOR elif method == 'linear': alpha = it / cfg.SOLVER.WARM_UP_ITERS warmup_factor = cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha else: raise KeyError('Unknown SOLVER.WARM_UP_METHOD: {}'.format(method)) lr *= warmup_factor return np.float32(lr) # ---------------------------------------------------------------------------- # # Learning rate policy functions # ---------------------------------------------------------------------------- # def lr_func_steps_with_lrs(cur_iter): """For cfg.SOLVER.LR_POLICY = 'steps_with_lrs' Change the learning rate to specified values at specified iterations. Example: cfg.SOLVER.MAX_ITER: 90 cfg.SOLVER.STEPS: [0, 60, 80] cfg.SOLVER.LRS: [0.02, 0.002, 0.0002] for cur_iter in [0, 59] use 0.02 in [60, 79] use 0.002 in [80, inf] use 0.0002 """ ind = get_step_index(cur_iter) return cfg.SOLVER.LRS[ind] def lr_func_steps_with_decay(cur_iter): """For cfg.SOLVER.LR_POLICY = 'steps_with_decay' Change the learning rate specified iterations based on the formula lr = base_lr * gamma ** lr_step_count. Example: cfg.SOLVER.MAX_ITER: 90 cfg.SOLVER.STEPS: [0, 60, 80] cfg.SOLVER.BASE_LR: 0.02 cfg.SOLVER.GAMMA: 0.1 for cur_iter in [0, 59] use 0.02 = 0.02 * 0.1 ** 0 in [60, 79] use 0.002 = 0.02 * 0.1 ** 1 in [80, inf] use 0.0002 = 0.02 * 0.1 ** 2 """ ind = get_step_index(cur_iter) return cfg.SOLVER.BASE_LR * cfg.SOLVER.GAMMA ** ind def lr_func_step(cur_iter): """For cfg.SOLVER.LR_POLICY = 'step' """ return ( cfg.SOLVER.BASE_LR * cfg.SOLVER.GAMMA ** (cur_iter // cfg.SOLVER.STEP_SIZE)) def lr_func_cosine_decay(cur_iter): """For cfg.SOLVER.LR_POLICY = 'cosine_decay' """ iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER cos_frac = 0.5 * (np.cos(np.pi * iter_frac) + 1) return cfg.SOLVER.BASE_LR * cos_frac def lr_func_exp_decay(cur_iter): """For cfg.SOLVER.LR_POLICY = 'exp_decay' """ # GAMMA is final/initial learning rate ratio iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER exp_frac = np.exp(iter_frac * np.log(cfg.SOLVER.GAMMA)) return cfg.SOLVER.BASE_LR * exp_frac # ---------------------------------------------------------------------------- # # Helpers # ---------------------------------------------------------------------------- # def get_step_index(cur_iter): """Given an iteration, find which learning rate step we're at.""" assert cfg.SOLVER.STEPS[0] == 0, 'The first step should always start at 0.' steps = cfg.SOLVER.STEPS + [cfg.SOLVER.MAX_ITER] for ind, step in enumerate(steps): # NoQA if cur_iter < step: break return ind - 1 def get_lr_func(): policy = 'lr_func_' + cfg.SOLVER.LR_POLICY if policy not in globals(): raise NotImplementedError( 'Unknown LR policy: {}'.format(cfg.SOLVER.LR_POLICY)) else: return globals()[policy] ================================================ FILE: detectron/utils/model_convert_utils.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## '''Helper functions for model conversion to pb''' from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from functools import wraps import copy import numpy as np from caffe2.python import core, workspace from caffe2.proto import caffe2_pb2 class OpFilter: def __init__(self, **kwargs): self.type = None self.type_in = None self.inputs = None self.outputs = None self.input_has = None self.output_has = None self.cond = None self.reverse = False assert all([x in self.__dict__ for x in kwargs]) self.__dict__.update(kwargs) def check(self, op): ret = self.reverse if self.type and op.type != self.type: return ret if self.type_in and op.type not in self.type_in: return ret if self.inputs and set(op.input) != set(self.inputs): return ret if self.outputs and set(op.output) != set(self.outputs): return ret if self.input_has and self.input_has not in op.input: return ret if self.output_has and self.output_has not in op.output: return ret if self.cond is not None and not self.cond: return ret return not ret def filter_op(op, **kwargs): ''' Returns true if passed all checks ''' return OpFilter(**kwargs).check(op) def op_filter(**filter_args): ''' Returns None if no condition is satisfied ''' def actual_decorator(f): @wraps(f) def wrapper(op, **params): if not filter_op(op, **filter_args): return None return f(op, **params) return wrapper return actual_decorator def op_func_chain(convert_func_list): ''' Run funcs one by one until func return is not None ''' assert isinstance(convert_func_list, list) def _chain(op): for x in convert_func_list: ret = x(op) if ret is not None: return ret return None return _chain def convert_op_in_ops(ops_ref, func_or_list): func = func_or_list if isinstance(func_or_list, list): func = op_func_chain(func_or_list) ops = [op for op in ops_ref] converted_ops = [] for op in ops: new_ops = func(op) if new_ops is not None and not isinstance(new_ops, list): new_ops = [new_ops] converted_ops.extend(new_ops if new_ops is not None else [op]) del ops_ref[:] # ops_ref maybe of type RepeatedCompositeFieldContainer # which does not have append() ops_ref.extend(converted_ops) def convert_op_in_proto(proto, func_or_list): convert_op_in_ops(proto.op, func_or_list) def get_op_arg(op, arg_name): for x in op.arg: if x.name == arg_name: return x return None def get_op_arg_valf(op, arg_name, default_val): arg = get_op_arg(op, arg_name) return arg.f if arg is not None else default_val def update_mobile_engines(net): for op in net.op: if op.type == "Conv": op.engine = "NNPACK" if op.type == "ConvTranspose": op.engine = "BLOCK" def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." from itertools import tee a, b = tee(iterable) next(b, None) return zip(a, b) def blob_uses(net, blob): u = [] for i, op in enumerate(net.op): if blob in op.input or blob in op.control_input: u.append(i) return u def fuse_first_affine(net, params, removed_tensors): net = copy.deepcopy(net) params = copy.deepcopy(params) for ((i, current), (j, next_)) in pairwise(enumerate(net.op)): if next_.input[0] != current.output[0]: continue if current.type not in ("Conv", "ConvTranspose") \ or next_.type != "AffineChannel": continue if current.output[0] != next_.output[0] and \ len(blob_uses(net, current.output[0])) != 1: # Can't fuse if more than one user unless AffineChannel is inplace continue # else, can fuse conv = current affine = next_ fused_conv = copy.deepcopy(conv) fused_conv.output[0] = affine.output[0] conv_weight = params[conv.input[1]] conv_has_bias = len(conv.input) > 2 conv_bias = params[conv.input[2]] if conv_has_bias else 0 A = params[affine.input[1]] B = params[affine.input[2]] # Thus, can just have the affine transform # X * A + B # where # A = bn_scale * 1.0 / (sqrt(running_var + eps)) # B = (bias - running_mean * (1.0 / sqrt(running_var + eps)) # * bn_scale) # This identify should hold if we have correctly fused # np.testing.assert_array_equal( # params[conv.output[0]] * A + B, # params[bn.output[0]]) # Now, we have that the computation made is the following: # ((X `conv` W) + b) * A + B # Then, we can simply fuse this as follows: # (X `conv` (W * A)) + b * A + B # which is simply # (X `conv` Q) + C # where # Q = W * A # C = b * A + B # For ConvTranspose, from the view of convolutions as a # Toepeliz multiplication, we have W_ = W^T, so the weights # are laid out as (R, S, K, K) (vs (S, R, K, K) for a Conv), # so the weights broadcast slightly differently. Remember, our # BN scale 'B' is of size (S,) A_ = A.reshape(-1, 1, 1, 1) if conv.type == "Conv" else \ A.reshape(1, -1, 1, 1) C = conv_bias * A + B Q = conv_weight * A_ assert params[conv.input[1]].shape == Q.shape params[conv.input[1]] = Q if conv_has_bias: assert params[conv.input[2]].shape == C.shape params[conv.input[2]] = C else: # make af_bias to be bias of the conv layer fused_conv.input.append(affine.input[2]) params[affine.input[2]] = B new_ops = net.op[:i] + [fused_conv] + net.op[j + 1:] del net.op[:] if conv_has_bias: del params[affine.input[2]] removed_tensors.append(affine.input[2]) removed_tensors.append(affine.input[1]) del params[affine.input[1]] net.op.extend(new_ops) break return net, params, removed_tensors def fuse_affine(net, params, ignore_failure): # Run until we hit a fixed point removed_tensors = [] while True: (next_net, next_params, removed_tensors) = \ fuse_first_affine(net, params, removed_tensors) if len(next_net.op) == len(net.op): if ( any(op.type == "AffineChannel" for op in next_net.op) and not ignore_failure ): raise Exception( "Model contains AffineChannel op after fusion: %s", next_net) return (next_net, next_params, removed_tensors) net, params, removed_tensors = (next_net, next_params, removed_tensors) def fuse_net(fuse_func, net, blobs, ignore_failure=False): is_core_net = isinstance(net, core.Net) if is_core_net: net = net.Proto() net, params, removed_tensors = fuse_func(net, blobs, ignore_failure) for rt in removed_tensors: net.external_input.remove(rt) if is_core_net: net = core.Net(net) return net, params def fuse_net_affine(net, blobs): return fuse_net(fuse_affine, net, blobs) def add_tensor(net, name, blob): ''' Create an operator to store the tensor 'blob', run the operator to put the blob to workspace. uint8 is stored as an array of string with one element. ''' kTypeNameMapper = { np.dtype('float32'): "GivenTensorFill", np.dtype('int32'): "GivenTensorIntFill", np.dtype('int64'): "GivenTensorInt64Fill", np.dtype('uint8'): "GivenTensorStringFill", } shape = blob.shape values = blob # pass array of uint8 as a string to save storage # storing uint8_t has a large overhead for now if blob.dtype == np.dtype('uint8'): shape = [1] values = [str(blob.data)] op = core.CreateOperator( kTypeNameMapper[blob.dtype], [], [name], shape=shape, values=values, # arg=[ # putils.MakeArgument("shape", shape), # putils.MakeArgument("values", values), # ] ) net.op.extend([op]) def gen_init_net_from_blobs(blobs, blobs_to_use=None, excluded_blobs=None): ''' Generate an initialization net based on a blob dict ''' ret = caffe2_pb2.NetDef() if blobs_to_use is None: blobs_to_use = {x for x in blobs} else: blobs_to_use = copy.deepcopy(blobs_to_use) if excluded_blobs is not None: blobs_to_use = [x for x in blobs_to_use if x not in excluded_blobs] for name in blobs_to_use: blob = blobs[name] if isinstance(blob, str): print('Blob {} with type {} is not supported in generating init net,' ' skipped.'.format(name, type(blob))) continue add_tensor(ret, name, blob) return ret def get_ws_blobs(blob_names=None): ''' Get blobs in 'blob_names' in the default workspace, get all blobs if blob_names is None ''' blobs = {} if blob_names is None: blob_names = workspace.Blobs() blobs = {x: workspace.FetchBlob(x) for x in blob_names} return blobs def get_device_option_cpu(): device_option = core.DeviceOption(caffe2_pb2.CPU) return device_option def get_device_option_cuda(gpu_id=0): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.device_id = gpu_id return device_option def create_input_blobs_for_net(net_def): for op in net_def.op: for blob_in in op.input: if not workspace.HasBlob(blob_in): workspace.CreateBlob(blob_in) def compare_model(model1_func, model2_func, test_image, check_blobs): ''' model_func(test_image, check_blobs) ''' cb1, cb2 = check_blobs, check_blobs if isinstance(check_blobs, dict): cb1 = check_blobs.keys() cb2 = check_blobs.values() print('Running the first model...') res1 = model1_func(test_image, check_blobs) print('Running the second model...') res2 = model2_func(test_image, check_blobs) for idx in range(len(cb1)): print('Checking {} -> {}...'.format(cb1[idx], cb2[idx])) n1, n2 = cb1[idx], cb2[idx] r1 = res1[n1] if n1 in res1 else None r2 = res2[n2] if n2 in res2 else None assert r1 is not None or r2 is None, \ "Blob {} in model1 is None".format(n1) assert r2 is not None or r1 is None, \ "Blob {} in model2 is None".format(n2) assert r1.shape == r2.shape, \ "Blob {} and {} shape mismatched: {} vs {}".format( n1, n2, r1.shape, r2.shape) np.testing.assert_array_almost_equal( r1, r2, decimal=3, err_msg='{} and {} not matched. Max diff: {}'.format( n1, n2, np.amax(np.absolute(r1 - r2)))) return True # graph_name could not contain word 'graph' def save_graph(net, file_name, graph_name="net", op_only=True): from caffe2.python import net_drawer graph = None ops = net.op if not op_only: graph = net_drawer.GetPydotGraph( ops, graph_name, rankdir="TB") else: graph = net_drawer.GetPydotGraphMinimal( ops, graph_name, rankdir="TB", minimal_dependency=True) try: graph.write_png(file_name) except Exception as e: print('Error when writing graph to image {}'.format(e)) ================================================ FILE: detectron/utils/net.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Helper functions for working with Caffe2 networks (i.e., operator graphs).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import OrderedDict import logging import numpy as np import os import pprint from caffe2.python import core from caffe2.python import workspace from detectron.core.config import cfg from detectron.core.config import load_cfg from detectron.utils.io import load_object from detectron.utils.io import save_object import detectron.utils.c2 as c2_utils import detectron.utils.env as envu logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) def initialize_from_weights_file(model, weights_file, broadcast=True): """Initialize a model from weights stored in a pickled dictionary. If multiple GPUs are used, the loaded weights are synchronized on all GPUs, unless 'broadcast' is False. """ initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) if broadcast: broadcast_parameters(model) def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() src_blobs = load_object(weights_file) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[ unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.info( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name] .shape ) ) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype( np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob( '__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name)) def save_model_to_weights_file(weights_file, model): """Stash model weights in a dictionary and pickle them to a file. We map GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' -> 'conv1_w'). """ logger.info( 'Saving parameters and momentum to {}'.format( os.path.abspath(weights_file))) blobs = {} # Save all parameters for param in model.params: scoped_name = str(param) unscoped_name = c2_utils.UnscopeName(scoped_name) if unscoped_name not in blobs: logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save momentum for param in model.TrainableParams(): scoped_name = str(param) + '_momentum' unscoped_name = c2_utils.UnscopeName(scoped_name) if unscoped_name not in blobs: logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save preserved blobs for scoped_name in workspace.Blobs(): if scoped_name.startswith('__preserve__/'): unscoped_name = c2_utils.UnscopeName(scoped_name) if unscoped_name not in blobs: logger.debug( ' {:s} -> {:s} (preserved)'.format( scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) cfg_yaml = envu.yaml_dump(cfg) save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file) def broadcast_parameters(model): """Copy parameter blobs from GPU 0 over the corresponding parameter blobs on GPUs 1 through cfg.NUM_GPUS - 1. """ if cfg.NUM_GPUS == 1: # no-op if only running on a single GPU return def _do_broadcast(all_blobs): assert len(all_blobs) % cfg.NUM_GPUS == 0, \ ('Unexpected value for NUM_GPUS. Make sure you are not ' 'running single-GPU inference with NUM_GPUS > 1.') blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS) for i in range(blobs_per_gpu): blobs = [p for p in all_blobs[i::blobs_per_gpu]] data = workspace.FetchBlob(blobs[0]) logger.debug('Broadcasting {} to'.format(str(blobs[0]))) for i, p in enumerate(blobs[1:]): logger.debug(' |-> {}'.format(str(p))) with c2_utils.CudaScope(i + 1): workspace.FeedBlob(p, data) _do_broadcast(model.params) _do_broadcast([b + '_momentum' for b in model.TrainableParams()]) def sum_multi_gpu_blob(blob_name): """Return the sum of a scalar blob held on multiple GPUs.""" val = 0 for i in range(cfg.NUM_GPUS): val += float(workspace.FetchBlob('gpu_{}/{}'.format(i, blob_name))) return val def average_multi_gpu_blob(blob_name): """Return the average of a scalar blob held on multiple GPUs.""" return sum_multi_gpu_blob(blob_name) / cfg.NUM_GPUS def print_net(model, namescope='gpu_0'): """Print the model network.""" logger.info('Printing model: {}'.format(model.net.Name())) op_list = model.net.Proto().op for op in op_list: input_name = op.input # For simplicity: only print the first output # Not recommended if there are split layers output_name = str(op.output[0]) op_type = op.type op_name = op.name if namescope is None or output_name.startswith(namescope): # Only print the forward pass network if output_name.find('grad') >= 0 or output_name.find('__m') >= 0: continue try: # Under some conditions (e.g., dynamic memory optimization) # it is possible that the network frees some blobs when they are # no longer needed. Handle this case... output_shape = workspace.FetchBlob(output_name).shape except BaseException: output_shape = '' first_blob = True op_label = op_type + (op_name if op_name == '' else ':' + op_name) suffix = ' ------- (op: {})'.format(op_label) for j in range(len(input_name)): if input_name[j] in model.params: continue input_blob = workspace.FetchBlob(input_name[j]) if isinstance(input_blob, np.ndarray): input_shape = input_blob.shape logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format( c2_utils.UnscopeName(str(input_name[j])), '{}'.format(input_shape), c2_utils.UnscopeName(str(output_name)), '{}'.format(output_shape), suffix)) if first_blob: first_blob = False suffix = ' ------|' logger.info('End of model: {}'.format(model.net.Name())) def configure_bbox_reg_weights(model, saved_cfg): """Compatibility for old models trained with bounding box regression mean/std normalization (instead of fixed weights). """ if 'MODEL' not in saved_cfg or 'BBOX_REG_WEIGHTS' not in saved_cfg.MODEL: logger.warning('Model from weights file was trained before config key ' 'MODEL.BBOX_REG_WEIGHTS was added. Forcing ' 'MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) to ensure ' 'correct **inference** behavior.') # Generally we don't allow modifying the config, but this is a one-off # hack to support some very old models is_immutable = cfg.is_immutable() cfg.immutable(False) cfg.MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) cfg.immutable(is_immutable) logger.info('New config:') logger.info(pprint.pformat(cfg)) assert not model.train, ( 'This model was trained with an older version of the code that ' 'used bounding box regression mean/std normalization. It can no ' 'longer be used for training. To upgrade it to a trainable model ' 'please use fb/compat/convert_bbox_reg_normalized_model.py.' ) def get_group_gn(dim): """ get number of groups used by GroupNorm, based on number of channels """ dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP num_groups = cfg.GROUP_NORM.NUM_GROUPS assert dim_per_gp == -1 or num_groups == -1, \ "GroupNorm: can only specify G or C/G." if dim_per_gp > 0: assert dim % dim_per_gp == 0 group_gn = dim // dim_per_gp else: assert dim % num_groups == 0 group_gn = num_groups return group_gn ================================================ FILE: detectron/utils/segms.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Functions for interacting with segmentation masks in the COCO format. The following terms are used in this module mask: a binary mask encoded as a 2D numpy array segm: a segmentation mask in one of the two COCO formats (polygon or RLE) polygon: COCO's polygon format RLE: COCO's run length encoding format """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import pycocotools.mask as mask_util # Type used for storing masks in polygon format _POLY_TYPE = list # Type used for storing masks in RLE format _RLE_TYPE = dict def is_poly(segm): """Determine if segm is a polygon. Valid segm expected (polygon or RLE).""" assert isinstance(segm, (_POLY_TYPE, _RLE_TYPE)), \ 'Invalid segm type: {}'.format(type(segm)) return isinstance(segm, _POLY_TYPE) def flip_segms(segms, height, width): """Left/right flip each mask in a list of masks.""" def _flip_poly(poly, width): flipped_poly = np.array(poly) flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 return flipped_poly.tolist() def _flip_rle(rle, height, width): if 'counts' in rle and type(rle['counts']) == list: # Magic RLE format handling painfully discovered by looking at the # COCO API showAnns function. rle = mask_util.frPyObjects([rle], height, width) mask = mask_util.decode(rle) mask = mask[:, ::-1, :] rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) return rle flipped_segms = [] for segm in segms: if is_poly(segm): # Polygon format flipped_segms.append([_flip_poly(poly, width) for poly in segm]) else: # RLE format flipped_segms.append(_flip_rle(segm, height, width)) return flipped_segms def polys_to_mask(polygons, height, width): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed inside a height x width image. The resulting mask is therefore of shape (height, width). """ rle = mask_util.frPyObjects(polygons, height, width) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask def mask_to_bbox(mask): """Compute the tight bounding box of a binary mask.""" xs = np.where(np.sum(mask, axis=0) > 0)[0] ys = np.where(np.sum(mask, axis=1) > 0)[0] if len(xs) == 0 or len(ys) == 0: return None x0 = xs[0] x1 = xs[-1] y0 = ys[0] y1 = ys[-1] return np.array((x0, y0, x1, y1), dtype=np.float32) def polys_to_mask_wrt_box(polygons, box, M): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed in the given box and rasterized to an M x M mask. The resulting mask is therefore of shape (M, M). """ w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask def polys_to_boxes(polys): """Convert a list of polygons into an array of tight bounding boxes.""" boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) for i in range(len(polys)): poly = polys[i] x0 = min(min(p[::2]) for p in poly) x1 = max(max(p[::2]) for p in poly) y0 = min(min(p[1::2]) for p in poly) y1 = max(max(p[1::2]) for p in poly) boxes_from_polys[i, :] = [x0, y0, x1, y1] return boxes_from_polys def rle_mask_voting( top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG' ): """Returns new masks (in correspondence with `top_masks`) by combining multiple overlapping masks coming from the pool of `all_masks`. Two methods for combining masks are supported: 'AVG' uses a weighted average of overlapping mask pixels; 'UNION' takes the union of all mask pixels. """ if len(top_masks) == 0: return all_not_crowd = [False] * len(all_masks) top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) decoded_all_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks ] decoded_top_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks ] all_boxes = all_dets[:, :4].astype(np.int32) all_scores = all_dets[:, 4] # Fill box support with weights mask_shape = decoded_all_masks[0].shape mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) for k in range(len(all_masks)): ref_box = all_boxes[k] x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, mask_shape[1]) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, mask_shape[0]) mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] mask_weights = np.maximum(mask_weights, 1e-5) top_segms_out = [] for k in range(len(top_masks)): # Corner case of empty mask if decoded_top_masks[k].sum() == 0: top_segms_out.append(top_masks[k]) continue inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] # Only matches itself if len(inds_to_vote) == 1: top_segms_out.append(top_masks[k]) continue masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] if method == 'AVG': ws = mask_weights[inds_to_vote] soft_mask = np.average(masks_to_vote, axis=0, weights=ws) mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) elif method == 'UNION': # Any pixel that's on joins the mask soft_mask = np.sum(masks_to_vote, axis=0) mask = np.array(soft_mask > 1e-5, dtype=np.uint8) else: raise NotImplementedError('Method {} is unknown'.format(method)) rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] top_segms_out.append(rle) return top_segms_out def rle_mask_nms(masks, dets, thresh, mode='IOU'): """Performs greedy non-maximum suppression based on an overlap measurement between masks. The type of measurement is determined by `mode` and can be either 'IOU' (standard intersection over union) or 'IOMA' (intersection over mininum area). """ if len(masks) == 0: return [] if len(masks) == 1: return [0] if mode == 'IOU': # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) all_not_crowds = [False] * len(masks) ious = mask_util.iou(masks, masks, all_not_crowds) elif mode == 'IOMA': # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) all_crowds = [True] * len(masks) # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) ious = mask_util.iou(masks, masks, all_crowds) # ... = max(area(intersect(m1, m2)) / area(m2), # area(intersect(m2, m1)) / area(m1)) ious = np.maximum(ious, ious.transpose()) elif mode == 'CONTAINMENT': # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) # Which measures how much m2 is contained inside m1 all_crowds = [True] * len(masks) ious = mask_util.iou(masks, masks, all_crowds) else: raise NotImplementedError('Mode {} is unknown'.format(mode)) scores = dets[:, 4] order = np.argsort(-scores) keep = [] while order.size > 0: i = order[0] keep.append(i) ovr = ious[i, order[1:]] inds_to_keep = np.where(ovr <= thresh)[0] order = order[inds_to_keep + 1] return keep def rle_masks_to_boxes(masks): """Computes the bounding box of each mask in a list of RLE encoded masks.""" if len(masks) == 0: return [] decoded_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks ] def get_bounds(flat_mask): inds = np.where(flat_mask > 0)[0] return inds.min(), inds.max() boxes = np.zeros((len(decoded_masks), 4)) keep = [True] * len(decoded_masks) for i, mask in enumerate(decoded_masks): if mask.sum() == 0: keep[i] = False continue flat_mask = mask.sum(axis=0) x0, x1 = get_bounds(flat_mask) flat_mask = mask.sum(axis=1) y0, y1 = get_bounds(flat_mask) boxes[i, :] = (x0, y0, x1, y1) return boxes, np.where(keep)[0] ================================================ FILE: detectron/utils/subprocess.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Primitives for running multiple single-GPU jobs in parallel over subranges of data. These are used for running multi-GPU inference. Subprocesses are used to avoid the GIL since inference may involve non-trivial amounts of Python code. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import os import numpy as np import subprocess from six.moves import shlex_quote from detectron.core.config import cfg from detectron.utils.io import load_object import detectron.utils.env as envu import logging logger = logging.getLogger(__name__) def process_in_parallel( tag, total_range_size, binary, output_dir, opts='' ): """Run the specified binary cfg.NUM_GPUS times in parallel, each time as a subprocess that uses one GPU. The binary must accept the command line arguments `--range {start} {end}` that specify a data processing range. """ # Snapshot the current cfg state in order to pass to the inference # subprocesses cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag)) with open(cfg_file, 'w') as f: envu.yaml_dump(cfg, stream=f) subprocess_env = os.environ.copy() processes = [] subinds = np.array_split(range(total_range_size), cfg.NUM_GPUS) # Determine GPUs to use cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') if cuda_visible_devices: gpu_inds = map(int, cuda_visible_devices.split(',')) assert -1 not in gpu_inds, \ 'Hiding GPU indices using the \'-1\' index is not supported' else: gpu_inds = range(cfg.NUM_GPUS) # Run the binary in cfg.NUM_GPUS subprocesses for i, gpu_ind in enumerate(gpu_inds): start = subinds[i][0] end = subinds[i][-1] + 1 subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind) cmd = '{binary} --range {start} {end} --cfg {cfg_file} NUM_GPUS 1 {opts}' cmd = cmd.format( binary=shlex_quote(binary), start=int(start), end=int(end), cfg_file=shlex_quote(cfg_file), opts=' '.join([shlex_quote(opt) for opt in opts]) ) logger.info('{} range command {}: {}'.format(tag, i, cmd)) if i == 0: subprocess_stdout = subprocess.PIPE else: filename = os.path.join( output_dir, '%s_range_%s_%s.stdout' % (tag, start, end) ) subprocess_stdout = open(filename, 'w') # NOQA (close below) p = subprocess.Popen( cmd, shell=True, env=subprocess_env, stdout=subprocess_stdout, stderr=subprocess.STDOUT, bufsize=1 ) processes.append((i, p, start, end, subprocess_stdout)) # Log output from inference processes and collate their results outputs = [] for i, p, start, end, subprocess_stdout in processes: log_subprocess_output(i, p, output_dir, tag, start, end) if i > 0: subprocess_stdout.close() range_file = os.path.join( output_dir, '%s_range_%s_%s.pkl' % (tag, start, end) ) range_data = load_object(range_file) outputs.append(range_data) return outputs def log_subprocess_output(i, p, output_dir, tag, start, end): """Capture the output of each subprocess and log it in the parent process. The first subprocess's output is logged in realtime. The output from the other subprocesses is buffered and then printed all at once (in order) when subprocesses finish. """ outfile = os.path.join( output_dir, '%s_range_%s_%s.stdout' % (tag, start, end) ) logger.info('# ' + '-' * 76 + ' #') logger.info( 'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end) ) logger.info('# ' + '-' * 76 + ' #') if i == 0: # Stream the piped stdout from the first subprocess in realtime with open(outfile, 'wb') as f: for line in iter(p.stdout.readline, b''): print(line.rstrip().decode("utf8")) f.write(line) p.stdout.close() ret = p.wait() else: # For subprocesses >= 1, wait and dump their log file ret = p.wait() with open(outfile, 'r') as f: print(''.join(f.readlines())) assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret) ================================================ FILE: detectron/utils/timer.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Timing related functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import time class Timer: """A simple timer.""" def __init__(self): self.reset() def tic(self): # using time.time instead of time.clock because time time.clock # does not normalize for multithreading self.start_time = time.time() def toc(self, average=True): self.diff = time.time() - self.start_time self.total_time += self.diff self.calls += 1 self.average_time = self.total_time / self.calls if average: return self.average_time else: return self.diff def reset(self): self.total_time = 0. self.calls = 0 self.start_time = 0. self.diff = 0. self.average_time = 0. ================================================ FILE: detectron/utils/train.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Utilities driving the train_net binary""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from shutil import copyfile import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import logging import numpy as np import os import re from caffe2.python import memonger from caffe2.python import workspace from detectron.core.config import cfg from detectron.core.config import get_output_dir from detectron.datasets.roidb import combined_roidb_for_training from detectron.modeling import model_builder from detectron.utils import lr_policy from detectron.utils.training_stats import TrainingStats import detectron.utils.env as envu import detectron.utils.net as nu def train_model(): """Model training loop.""" model, weights_file, start_iter, checkpoints, output_dir = create_model() if 'final' in checkpoints: # The final model was found in the output directory, so nothing to do return checkpoints setup_model_for_training(model, weights_file, output_dir) training_stats = TrainingStats(model) CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS) for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER): if model.roi_data_loader.has_stopped(): handle_critical_error(model, 'roi_data_loader failed') training_stats.IterTic() lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter)) workspace.RunNet(model.net.Proto().name) if cur_iter == start_iter: nu.print_net(model) training_stats.IterToc() training_stats.UpdateIterStats() training_stats.LogIterStats(cur_iter, lr) if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter: checkpoints[cur_iter] = os.path.join( output_dir, 'model_iter{}.pkl'.format(cur_iter) ) nu.save_model_to_weights_file(checkpoints[cur_iter], model) if cur_iter == start_iter + training_stats.LOG_PERIOD: # Reset the iteration timer to remove outliers from the first few # SGD iterations training_stats.ResetIterTimer() if np.isnan(training_stats.iter_total_loss): handle_critical_error(model, 'Loss is NaN') # Save the final model checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl') nu.save_model_to_weights_file(checkpoints['final'], model) # Shutdown data loading threads model.roi_data_loader.shutdown() return checkpoints def handle_critical_error(model, msg): logger = logging.getLogger(__name__) logger.critical(msg) model.roi_data_loader.shutdown() raise Exception(msg) def create_model(): """Build the model and look for saved model checkpoints in case we can resume from one. """ logger = logging.getLogger(__name__) start_iter = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) weights_file = cfg.TRAIN.WEIGHTS if cfg.TRAIN.AUTO_RESUME: # Check for the final model (indicates training already finished) final_path = os.path.join(output_dir, 'model_final.pkl') if os.path.exists(final_path): logger.info('model_final.pkl exists; no need to train!') return None, None, None, {'final': final_path}, output_dir if cfg.TRAIN.COPY_WEIGHTS: copyfile( weights_file, os.path.join(output_dir, os.path.basename(weights_file))) logger.info('Copy {} to {}'.format(weights_file, output_dir)) # Find the most recent checkpoint (highest iteration number) files = os.listdir(output_dir) for f in files: iter_string = re.findall(r'(?<=model_iter)\d+(?=\.pkl)', f) if len(iter_string) > 0: checkpoint_iter = int(iter_string[0]) if checkpoint_iter > start_iter: # Start one iteration immediately after the checkpoint iter start_iter = checkpoint_iter + 1 resume_weights_file = f if start_iter > 0: # Override the initialization weights with the found checkpoint weights_file = os.path.join(output_dir, resume_weights_file) logger.info( '========> Resuming from checkpoint {} at start iter {}'. format(weights_file, start_iter) ) logger.info('Building model: {}'.format(cfg.MODEL.TYPE)) model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) return model, weights_file, start_iter, checkpoints, output_dir def optimize_memory(model): """Save GPU memory through blob sharing.""" for device in range(cfg.NUM_GPUS): namescope = 'gpu_{}/'.format(device) losses = [namescope + l for l in model.losses] model.net._net = memonger.share_grad_blobs( model.net, losses, set(model.param_to_grad.values()), namescope, share_activations=cfg.MEMONGER_SHARE_ACTIVATIONS ) def setup_model_for_training(model, weights_file, output_dir): """Loaded saved weights and create the network in the C2 workspace.""" logger = logging.getLogger(__name__) add_model_training_inputs(model) if weights_file: # Override random weight initialization with weights from a saved model nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return output_dir def add_model_training_inputs(model): """Load the training dataset and attach the training inputs to the model.""" logger = logging.getLogger(__name__) logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS)) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) logger.info('{:d} roidb entries'.format(len(roidb))) model_builder.add_training_inputs(model, roidb=roidb) def dump_proto_files(model, output_dir): """Save prototxt descriptions of the training network and parameter initialization network.""" with open(os.path.join(output_dir, 'net.pbtxt'), 'w') as fid: fid.write(str(model.net.Proto())) with open(os.path.join(output_dir, 'param_init_net.pbtxt'), 'w') as fid: fid.write(str(model.param_init_net.Proto())) ================================================ FILE: detectron/utils/training_stats.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Utilities for training.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import datetime import numpy as np from caffe2.python import utils as c2_py_utils from detectron.core.config import cfg from detectron.utils.logging import log_json_stats from detectron.utils.logging import SmoothedValue from detectron.utils.timer import Timer import detectron.utils.net as nu class TrainingStats: """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize() ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetMedianValue()) ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats ================================================ FILE: detectron/utils/vis.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Detection output visualization module.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import cv2 import numpy as np import os import pycocotools.mask as mask_util from detectron.utils.colormap import colormap import detectron.utils.env as envu import detectron.utils.keypoints as keypoint_utils # Matplotlib requires certain adjustments in some environments # Must happen before importing matplotlib envu.set_up_matplotlib() import matplotlib.pyplot as plt from matplotlib.patches import Polygon plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator _GRAY = (218, 227, 218) _GREEN = (18, 127, 15) _WHITE = (255, 255, 255) def kp_connections(keypoints): kp_lines = [ [keypoints.index('left_eye'), keypoints.index('right_eye')], [keypoints.index('left_eye'), keypoints.index('nose')], [keypoints.index('right_eye'), keypoints.index('nose')], [keypoints.index('right_eye'), keypoints.index('right_ear')], [keypoints.index('left_eye'), keypoints.index('left_ear')], [keypoints.index('right_shoulder'), keypoints.index('right_elbow')], [keypoints.index('right_elbow'), keypoints.index('right_wrist')], [keypoints.index('left_shoulder'), keypoints.index('left_elbow')], [keypoints.index('left_elbow'), keypoints.index('left_wrist')], [keypoints.index('right_hip'), keypoints.index('right_knee')], [keypoints.index('right_knee'), keypoints.index('right_ankle')], [keypoints.index('left_hip'), keypoints.index('left_knee')], [keypoints.index('left_knee'), keypoints.index('left_ankle')], [keypoints.index('right_shoulder'), keypoints.index('left_shoulder')], [keypoints.index('right_hip'), keypoints.index('left_hip')], ] return kp_lines def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps): """Convert from the class boxes/segms/keyps format generated by the testing code. """ box_list = [b for b in cls_boxes if len(b) > 0] if len(box_list) > 0: boxes = np.concatenate(box_list) else: boxes = None if cls_segms is not None: segms = [s for slist in cls_segms for s in slist] else: segms = None if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist] else: keyps = None classes = [] for j in range(len(cls_boxes)): classes += [j] * len(cls_boxes[j]) return boxes, segms, keyps, classes def get_class_string(class_index, score, dataset): class_text = dataset.classes[class_index] if dataset is not None else \ 'id{:d}'.format(class_index) return class_text + ' {:0.2f}'.format(score).lstrip('0') def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1): """Visualizes a single binary mask.""" img = img.astype(np.float32) idx = np.nonzero(mask) img[idx[0], idx[1], :] *= 1.0 - alpha img[idx[0], idx[1], :] += alpha * col if show_border: contours = cv2.findContours( mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2] cv2.drawContours(img, contours, -1, _WHITE, border_thick, cv2.LINE_AA) return img.astype(np.uint8) def vis_class(img, pos, class_str, font_scale=0.35): """Visualizes the class.""" img = img.astype(np.uint8) x0, y0 = int(pos[0]), int(pos[1]) # Compute text size. txt = class_str font = cv2.FONT_HERSHEY_SIMPLEX ((txt_w, txt_h), _) = cv2.getTextSize(txt, font, font_scale, 1) # Place text background. back_tl = x0, y0 - int(1.3 * txt_h) back_br = x0 + txt_w, y0 cv2.rectangle(img, back_tl, back_br, _GREEN, -1) # Show text. txt_tl = x0, y0 - int(0.3 * txt_h) cv2.putText(img, txt, txt_tl, font, font_scale, _GRAY, lineType=cv2.LINE_AA) return img def vis_bbox(img, bbox, thick=1): """Visualizes a bounding box.""" img = img.astype(np.uint8) (x0, y0, w, h) = bbox x1, y1 = int(x0 + w), int(y0 + h) x0, y0 = int(x0), int(y0) cv2.rectangle(img, (x0, y0), (x1, y1), _GREEN, thickness=thick) return img def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7): """Visualizes keypoints (adapted from vis_one_image). kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob). """ dataset_keypoints, _ = keypoint_utils.get_keypoints() kp_lines = kp_connections(dataset_keypoints) # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] # Perform the drawing on a copy of the image, to allow for blending. kp_mask = np.copy(img) # Draw mid shoulder / mid hip first for better visualization. mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) nose_idx = dataset_keypoints.index('nose') if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh: cv2.line( kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]), color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: cv2.line( kp_mask, tuple(mid_shoulder), tuple(mid_hip), color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA) # Draw the keypoints. for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] p1 = kps[0, i1], kps[1, i1] p2 = kps[0, i2], kps[1, i2] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: cv2.line( kp_mask, p1, p2, color=colors[l], thickness=2, lineType=cv2.LINE_AA) if kps[2, i1] > kp_thresh: cv2.circle( kp_mask, p1, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) if kps[2, i2] > kp_thresh: cv2.circle( kp_mask, p2, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) # Blend the keypoints. return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0) def vis_one_image_opencv( im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, show_box=False, dataset=None, show_class=False): """Constructs a numpy array with the detections visualized.""" if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return im if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap() mask_color_id = 0 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) if show_box: im = vis_bbox( im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])) # show class (off by default) if show_class: class_str = get_class_string(classes[i], score, dataset) im = vis_class(im, (bbox[0], bbox[1] - 2), class_str) # show mask if segms is not None and len(segms) > i: color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 im = vis_mask(im, masks[..., i], color_mask) # show keypoints if keypoints is not None and len(keypoints) > i: im = vis_keypoints(im, keypoints[i], kp_thresh) return im def vis_one_image( im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf', out_when_no_box=False): """Visual debugging of detections.""" if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box: return dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) if boxes is None: sorted_inds = [] # avoid crash when 'boxes' is None else: # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text( bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict( facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] contour = cv2.findContours( e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2] for c in contour: polygon = Polygon( c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot( kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot( kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]] y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all') ================================================ FILE: docker/Dockerfile ================================================ # Use Caffe2 image as parent image FROM caffe2/caffe2:snapshot-py2-cuda9.0-cudnn7-ubuntu16.04 RUN mv /usr/local/caffe2 /usr/local/caffe2_build ENV Caffe2_DIR /usr/local/caffe2_build ENV PYTHONPATH /usr/local/caffe2_build:${PYTHONPATH} ENV LD_LIBRARY_PATH /usr/local/caffe2_build/lib:${LD_LIBRARY_PATH} # Clone the Detectron repository RUN git clone https://github.com/facebookresearch/detectron /detectron # Install Python dependencies RUN pip install -r /detectron/requirements.txt # Install the COCO API RUN git clone https://github.com/cocodataset/cocoapi.git /cocoapi WORKDIR /cocoapi/PythonAPI RUN make install # Go to Detectron root WORKDIR /detectron # Set up Python modules RUN make # [Optional] Build custom ops RUN make ops ================================================ FILE: projects/GN/README.md ================================================ # Group Normalization for Mask R-CNN
## Introduction This file provides Mask R-CNN baseline results and models trained with [Group Normalization](https://arxiv.org/abs/1803.08494): ``` @article{GroupNorm2018, title={Group Normalization}, author={Yuxin Wu and Kaiming He}, journal={arXiv:1803.08494}, year={2018} } ``` **Note:** This code uses the GroupNorm op implemented in CUDA, included in the Caffe2 repo. When writing this document, Caffe2 is being merged into PyTorch, and the GroupNorm op is located [here](https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu). Make sure your Caffe2 is up to date. ## Pretrained Models with GN These models are trained in Caffe2 on the standard ImageNet-1k dataset, using GroupNorm with 32 groups (G=32). - [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): ResNet-50 with GN, 24.0\% top-1 error (center-crop). - [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): ResNet-101 with GN, 22.6\% top-1 error (center-crop). ## Results ### Baselines with BN
         case           type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
model id
R-50-FPN, BN* Mask R-CNN 2x 2 8.6 0.897 44.9 0.099 + 0.018 38.6 34.5 35859007
R-101-FPN, BN* Mask R-CNN 2x 2 10.2 0.993 49.7 0.126 + 0.017 40.9 36.4 35861858
**Notes:** - This table is copied from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines). - BN* means that BatchNorm (BN) is used for pre-training and is frozen and turned into a per-channel linear layer when fine-tuning. This is the default of Faster/Mask R-CNN and Detectron. ### Mask R-CNN with GN #### Standard Mask R-CNN recipe
         case           type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
model id download
links
R-50-FPN, GN Mask R-CNN 2x 2 10.5 1.017 50.8 0.146 + 0.017 40.3 35.7 48616381 model  |  boxes  |  masks
R-101-FPN, GN Mask R-CNN 2x 2 12.4 1.151 57.5 0.180 + 0.015 41.8 36.8 48616724 model  |  boxes  |  masks
**Notes:** - GN is applied on: (i) ResNet layers inherited from pre-training, (ii) the FPN-specific layers, (iii) the RoI bbox head, and (iv) the RoI mask head. - These GN models use a 4conv+1fc RoI box head. The BN* counterpart with this head performs similarly with the default 2fc head: using this codebase, R-50-FPN BN\* with 4conv+1fc has 38.8/34.4 box/mask AP. - 2x is the default schedule (180k) in Detectron. #### Longer training schedule
         case           type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
model id download
links
R-50-FPN, GN Mask R-CNN 3x 2 10.5 1.033 77.4 0.145 + 0.015 40.8 36.1 48734751 model  |  boxes  |  masks
R-101-FPN, GN Mask R-CNN 3x 2 12.4 1.171 87.9 0.180 + 0.014 42.3 37.2 48734779 model  |  boxes  |  masks
**Notes:** - 3x is a longer schedule (270k). GN can improve further when using the longer schedule, but its BN* counterpart remains similar (R-50-FPN BN\*: 38.9/34.3) with the longer schedule. - These models are **without** any scale augmentation that can further [improve results](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#mask-r-cnn-with-bells--whistles). ### Explorations #### Training Mask R-CNN from scratch GN enables to train Mask R-CNN *from scratch* without ImageNet pre-training, despite the small batch size.
         case           type lr
schd
im/
gpu
train
mem
(GB)
train
time
(s/iter)
train
time
total
(hr)
inference
time
(s/im)
box
AP
mask
AP
model id
R-50-FPN, GN, scratch Mask R-CNN 3x 2 10.8 1.087 81.5 0.140 + 0.019 39.5 35.2 56421872
R-101-FPN, GN, scratch Mask R-CNN 3x 2 12.7 1.243 93.2 0.177 + 0.019 41.0 36.4 56421911
**Notes:** - To reproduce these results, see the config yaml files starting with ```scratch ```. - These are results using ```freeze_at=0```. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.  
R-50-FPN, GN, scratch Mask R-CNN 3x 2 10.5 0.990 74.3 0.146 + 0.020 36.2 32.5 49025460
R-101-FPN, GN, scratch Mask R-CNN 3x 2 12.4 1.124 84.3 0.180 + 0.019 37.5 33.3 49024951
**Notes:** - These are early results that followed the default training using ```freeze_at=2```. This means the layers of conv1 and res2 were simply random weights in the case of training from-scratch. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue. ================================================ FILE: requirements.txt ================================================ numpy>=1.13 pyyaml==3.12 matplotlib opencv-python>=3.2 setuptools Cython mock scipy six future protobuf ================================================ FILE: setup.py ================================================ # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from Cython.Build import cythonize from setuptools import Extension from setuptools import setup import numpy as np _NP_INCLUDE_DIRS = np.get_include() # Extension modules ext_modules = [ Extension( name='detectron.utils.cython_bbox', sources=[ 'detectron/utils/cython_bbox.pyx' ], extra_compile_args=[ '-Wno-cpp' ], include_dirs=[ _NP_INCLUDE_DIRS ] ), Extension( name='detectron.utils.cython_nms', sources=[ 'detectron/utils/cython_nms.pyx' ], extra_compile_args=[ '-Wno-cpp' ], include_dirs=[ _NP_INCLUDE_DIRS ] ) ] setup( name='Detectron', packages=['detectron'], ext_modules=cythonize(ext_modules) ) ================================================ FILE: tools/convert_cityscapes_to_coco.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import h5py import json import os import imageio import sys import cityscapesscripts.evaluation.instances2dict_with_polygons as cs import detectron.utils.segms as segms_util import detectron.utils.boxes as bboxs_util def parse_args(): parser = argparse.ArgumentParser(description='Convert dataset') parser.add_argument( '--dataset', help="cocostuff, cityscapes", default=None, type=str) parser.add_argument( '--outdir', help="output dir for json files", default=None, type=str) parser.add_argument( '--datadir', help="data dir for annotations to be converted", default=None, type=str) if len(sys.argv) == 1: parser.print_help() sys.exit(1) return parser.parse_args() def convert_coco_stuff_mat(data_dir, out_dir): """Convert to png and save json with path. This currently only contains the segmentation labels for objects+stuff in cocostuff - if we need to combine with other labels from original COCO that will be a TODO.""" sets = ['train', 'val'] categories = [] json_name = 'coco_stuff_%s.json' ann_dict = {} for data_set in sets: file_list = os.path.join(data_dir, '%s.txt') images = [] with open(file_list % data_set) as f: for img_id, img_name in enumerate(f): img_name = img_name.replace('coco', 'COCO').strip('\n') image = {} mat_file = os.path.join( data_dir, 'annotations/%s.mat' % img_name) data = h5py.File(mat_file, 'r') labelMap = data.get('S') if len(categories) == 0: labelNames = data.get('names') for idx, n in enumerate(labelNames): categories.append( {"id": idx, "name": ''.join(chr(i) for i in data[ n[0]])}) ann_dict['categories'] = categories imageio.imsave( os.path.join(data_dir, img_name + '.png'), labelMap) image['width'] = labelMap.shape[0] image['height'] = labelMap.shape[1] image['file_name'] = img_name image['seg_file_name'] = img_name image['id'] = img_id images.append(image) ann_dict['images'] = images print("Num images: %s" % len(images)) with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile: outfile.write(json.dumps(ann_dict)) # for Cityscapes def getLabelID(self, instID): if (instID < 1000): return instID else: return int(instID / 1000) def convert_cityscapes_instance_only( data_dir, out_dir): """Convert from cityscapes format to COCO instance seg format - polygons""" sets = [ 'gtFine_val', # 'gtFine_train', # 'gtFine_test', # 'gtCoarse_train', # 'gtCoarse_val', # 'gtCoarse_train_extra' ] ann_dirs = [ 'gtFine_trainvaltest/gtFine/val', # 'gtFine_trainvaltest/gtFine/train', # 'gtFine_trainvaltest/gtFine/test', # 'gtCoarse/train', # 'gtCoarse/train_extra', # 'gtCoarse/val' ] json_name = 'instancesonly_filtered_%s.json' ends_in = '%s_polygons.json' img_id = 0 ann_id = 0 cat_id = 1 category_dict = {} category_instancesonly = [ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle', ] for data_set, ann_dir in zip(sets, ann_dirs): print('Starting %s' % data_set) ann_dict = {} images = [] annotations = [] ann_dir = os.path.join(data_dir, ann_dir) for root, _, files in os.walk(ann_dir): for filename in files: if filename.endswith(ends_in % data_set.split('_')[0]): if len(images) % 50 == 0: print("Processed %s images, %s annotations" % ( len(images), len(annotations))) json_ann = json.load(open(os.path.join(root, filename))) image = {} image['id'] = img_id img_id += 1 image['width'] = json_ann['imgWidth'] image['height'] = json_ann['imgHeight'] image['file_name'] = filename[:-len( ends_in % data_set.split('_')[0])] + 'leftImg8bit.png' image['seg_file_name'] = filename[:-len( ends_in % data_set.split('_')[0])] + \ '%s_instanceIds.png' % data_set.split('_')[0] images.append(image) fullname = os.path.join(root, image['seg_file_name']) objects = cs.instances2dict_with_polygons( [fullname], verbose=False)[fullname] for object_cls in objects: if object_cls not in category_instancesonly: continue # skip non-instance categories for obj in objects[object_cls]: if obj['contours'] == []: print('Warning: empty contours.') continue # skip non-instance categories len_p = [len(p) for p in obj['contours']] if min(len_p) <= 4: print('Warning: invalid contours.') continue # skip non-instance categories ann = {} ann['id'] = ann_id ann_id += 1 ann['image_id'] = image['id'] ann['segmentation'] = obj['contours'] if object_cls not in category_dict: category_dict[object_cls] = cat_id cat_id += 1 ann['category_id'] = category_dict[object_cls] ann['iscrowd'] = 0 ann['area'] = obj['pixelCount'] ann['bbox'] = bboxs_util.xyxy_to_xywh( segms_util.polys_to_boxes( [ann['segmentation']])).tolist()[0] annotations.append(ann) ann_dict['images'] = images categories = [{"id": category_dict[name], "name": name} for name in category_dict] ann_dict['categories'] = categories ann_dict['annotations'] = annotations print("Num categories: %s" % len(categories)) print("Num images: %s" % len(images)) print("Num annotations: %s" % len(annotations)) with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile: outfile.write(json.dumps(ann_dict)) if __name__ == '__main__': args = parse_args() if args.dataset == "cityscapes_instance_only": convert_cityscapes_instance_only(args.datadir, args.outdir) elif args.dataset == "cocostuff": convert_coco_stuff_mat(args.datadir, args.outdir) else: print("Dataset not supported: %s" % args.dataset) ================================================ FILE: tools/convert_coco_model_to_cityscapes.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # Convert a detection model trained for COCO into a model that can be fine-tuned # on cityscapes # # cityscapes_to_coco from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import numpy as np import os import sys import detectron.datasets.coco_to_cityscapes_id as cs from detectron.utils.io import load_object from detectron.utils.io import save_object NUM_CS_CLS = 9 NUM_COCO_CLS = 81 def parse_args(): parser = argparse.ArgumentParser( description='Convert a COCO pre-trained model for use with Cityscapes') parser.add_argument( '--coco_model', dest='coco_model_file_name', help='Pretrained network weights file path', default=None, type=str) parser.add_argument( '--convert_func', dest='convert_func', help='Blob conversion function', default='cityscapes_to_coco', type=str) parser.add_argument( '--output', dest='out_file_name', help='Output file path', default=None, type=str) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def convert_coco_blobs_to_cityscape_blobs(model_dict): for k, v in model_dict['blobs'].items(): if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS: coco_blob = model_dict['blobs'][k] print( 'Converting COCO blob {} with shape {}'. format(k, coco_blob.shape) ) cs_blob = convert_coco_blob_to_cityscapes_blob( coco_blob, args.convert_func ) print(' -> converted shape {}'.format(cs_blob.shape)) model_dict['blobs'][k] = cs_blob def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func): # coco blob (81, ...) or (81*4, ...) coco_shape = coco_blob.shape leading_factor = int(coco_shape[0] / NUM_COCO_CLS) tail_shape = list(coco_shape[1:]) assert leading_factor == 1 or leading_factor == 4 # Reshape in [num_classes, ...] form for easier manipulations coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape) # Default initialization uses Gaussian with mean and std to match the # existing parameters std = coco_blob.std() mean = coco_blob.mean() cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:]) cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32) # Replace random parameters with COCO parameters if class mapping exists for i in range(NUM_CS_CLS): coco_cls_id = getattr(cs, convert_func)(i) if coco_cls_id >= 0: # otherwise ignore (rand init) cs_blob[i] = coco_blob[coco_cls_id] cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape return cs_blob.reshape(cs_shape) def remove_momentum(model_dict): for k in model_dict['blobs'].keys(): if k.endswith('_momentum'): del model_dict['blobs'][k] def load_and_convert_coco_model(args): model_dict = load_object(args.coco_model_file_name) remove_momentum(model_dict) convert_coco_blobs_to_cityscape_blobs(model_dict) return model_dict if __name__ == '__main__': args = parse_args() print(args) assert os.path.exists(args.coco_model_file_name), \ 'Weights file does not exist' weights = load_and_convert_coco_model(args) save_object(weights, args.out_file_name) print('Wrote blobs to {}:'.format(args.out_file_name)) print(sorted(weights['blobs'].keys())) ================================================ FILE: tools/convert_pkl_to_pb.py ================================================ #!/usr/bin/env python3 # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Script to convert the model (.yaml and .pkl) trained by train_net to a standard Caffe2 model in pb format (model.pb and model_init.pb). The converted model is good for production usage, as it could run independently and efficiently on CPU, GPU and mobile without depending on the detectron codebase. Please see Caffe2 tutorial ( https://caffe2.ai/docs/tutorial-loading-pre-trained-models.html) for loading the converted model, and run_model_pb() for running the model for inference. """ from __future__ import absolute_import, division, print_function, unicode_literals import argparse import copy import os import pprint import sys import caffe2.python.utils as putils import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import detectron.core.test_engine as test_engine import detectron.utils.blob as blob_utils import detectron.utils.c2 as c2_utils import detectron.utils.model_convert_utils as mutils import detectron.utils.vis as vis_utils import numpy as np from caffe2.caffe2.fb.predictor import predictor_exporter, predictor_py_utils from caffe2.proto import caffe2_pb2 from caffe2.python import core, workspace from caffe2.python.predictor_constants import predictor_constants from detectron.core.config import ( assert_and_infer_cfg, cfg, merge_cfg_from_file, merge_cfg_from_list, ) from detectron.modeling import generate_anchors from detectron.utils.logging import setup_logging from detectron.utils.model_convert_utils import convert_op_in_proto, op_filter c2_utils.import_contrib_ops() c2_utils.import_detectron_ops() # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) logger = setup_logging(__name__) def parse_args(): parser = argparse.ArgumentParser( description="Convert a trained network to pb format" ) parser.add_argument( "--cfg", dest="cfg_file", help="optional config file", default=None, type=str ) parser.add_argument( "--net_name", dest="net_name", help="optional name for the net", default="detectron", type=str, ) parser.add_argument( "--out_dir", dest="out_dir", help="output dir", default=None, type=str ) parser.add_argument( "--test_img", dest="test_img", help="optional test image, used to verify the model conversion", default=None, type=str, ) parser.add_argument( "--fuse_af", dest="fuse_af", help="1 to fuse_af", default=1, type=int ) parser.add_argument( "--device", dest="device", help="Device to run the model on", choices=["cpu", "gpu"], default="cpu", type=str, ) parser.add_argument( "--net_execution_type", dest="net_execution_type", help="caffe2 net execution type", choices=["simple", "dag"], default="simple", type=str, ) parser.add_argument( "--use_nnpack", dest="use_nnpack", help="Use nnpack for conv", default=1, type=int, ) parser.add_argument( "--logdb", dest="logdb", help="output to logfiledb instead of pb files", default=0, type=int, ) parser.add_argument( "opts", help="See detectron/core/config.py for all options", default=None, nargs=argparse.REMAINDER, ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) ret = parser.parse_args() ret.out_dir = os.path.abspath(ret.out_dir) if ret.device == "gpu" and ret.use_nnpack: logger.warn("Should not use mobile engine for gpu model.") ret.use_nnpack = 0 return ret def unscope_name(name): return c2_utils.UnscopeName(name) def reset_names(names): for i in range(len(names)): names[i] = unscope_name(names[i]) def convert_collect_and_distribute( op, blobs, roi_canonical_scale, roi_canonical_level, roi_max_level, roi_min_level, rpn_max_level, rpn_min_level, rpn_post_nms_topN, ): print( "Converting CollectAndDistributeFpnRpnProposals" " Python -> C++:\n{}".format(op) ) assert op.name.startswith( "CollectAndDistributeFpnRpnProposalsOp" ), "Not valid CollectAndDistributeFpnRpnProposalsOp" inputs = [x for x in op.input] ret = core.CreateOperator( "CollectAndDistributeFpnRpnProposals", inputs, list(op.output), roi_canonical_scale=roi_canonical_scale, roi_canonical_level=roi_canonical_level, roi_max_level=roi_max_level, roi_min_level=roi_min_level, rpn_max_level=rpn_max_level, rpn_min_level=rpn_min_level, rpn_post_nms_topN=rpn_post_nms_topN, ) return ret def convert_gen_proposals( op, blobs, rpn_pre_nms_topN, rpn_post_nms_topN, rpn_nms_thresh, rpn_min_size ): print("Converting GenerateProposals Python -> C++:\n{}".format(op)) assert op.name.startswith("GenerateProposalsOp"), "Not valid GenerateProposalsOp" spatial_scale = mutils.get_op_arg_valf(op, "spatial_scale", None) assert spatial_scale is not None lvl = int(op.input[0][-1]) if op.input[0][-1].isdigit() else None inputs = [x for x in op.input] anchor_name = "anchor{}".format(lvl) if lvl else "anchor" inputs.append(anchor_name) anchor_sizes = ( (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.0 ** (lvl - cfg.FPN.RPN_MIN_LEVEL),) if lvl else cfg.RPN.SIZES ) blobs[anchor_name] = get_anchors(spatial_scale, anchor_sizes) print("anchors {}".format(blobs[anchor_name])) ret = core.CreateOperator( "GenerateProposals", inputs, list(op.output), spatial_scale=spatial_scale, pre_nms_topN=rpn_pre_nms_topN, post_nms_topN=rpn_post_nms_topN, nms_thresh=rpn_nms_thresh, min_size=rpn_min_size, correct_transform_coords=True, ) return ret, anchor_name def get_anchors(spatial_scale, anchor_sizes): anchors = generate_anchors.generate_anchors( stride=1.0 / spatial_scale, sizes=anchor_sizes, aspect_ratios=cfg.RPN.ASPECT_RATIOS, ).astype(np.float32) return anchors def reset_blob_names(blobs): ret = {unscope_name(x): blobs[x] for x in blobs} blobs.clear() blobs.update(ret) def convert_net(args, net, blobs): @op_filter() def convert_op_name(op): if args.device != "gpu": if op.engine != "DEPTHWISE_3x3": op.engine = "" op.device_option.CopyFrom(caffe2_pb2.DeviceOption()) reset_names(op.input) reset_names(op.output) return [op] @op_filter(type="Python") def convert_python(op): if op.name.startswith("GenerateProposalsOp"): gen_proposals_op, ext_input = convert_gen_proposals( op, blobs, rpn_min_size=float(cfg.TEST.RPN_MIN_SIZE), rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N, rpn_pre_nms_topN=cfg.TEST.RPN_PRE_NMS_TOP_N, rpn_nms_thresh=cfg.TEST.RPN_NMS_THRESH, ) net.external_input.extend([ext_input]) return [gen_proposals_op] elif op.name.startswith("CollectAndDistributeFpnRpnProposalsOp"): collect_dist_op = convert_collect_and_distribute( op, blobs, roi_canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE, roi_canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL, roi_max_level=cfg.FPN.ROI_MAX_LEVEL, roi_min_level=cfg.FPN.ROI_MIN_LEVEL, rpn_max_level=cfg.FPN.RPN_MAX_LEVEL, rpn_min_level=cfg.FPN.RPN_MIN_LEVEL, rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N, ) return [collect_dist_op] else: raise ValueError("Failed to convert Python op {}".format(op.name)) # Only convert UpsampleNearest to ResizeNearest when converting to pb so that the existing models is unchanged # https://github.com/facebookresearch/Detectron/pull/372#issuecomment-410248561 @op_filter(type="UpsampleNearest") def convert_upsample_nearest(op): for arg in op.arg: if arg.name == "scale": scale = arg.i break else: raise KeyError('No attribute "scale" in UpsampleNearest op') resize_nearest_op = core.CreateOperator( "ResizeNearest", list(op.input), list(op.output), name=op.name, width_scale=float(scale), height_scale=float(scale), ) return resize_nearest_op @op_filter() def convert_rpn_rois(op): for j in range(len(op.input)): if op.input[j] == "rois": print( "Converting op {} input name: rois -> rpn_rois:\n{}".format( op.type, op ) ) op.input[j] = "rpn_rois" for j in range(len(op.output)): if op.output[j] == "rois": print( "Converting op {} output name: rois -> rpn_rois:\n{}".format( op.type, op ) ) op.output[j] = "rpn_rois" return [op] @op_filter(type_in=["StopGradient", "Alias"]) def convert_remove_op(op): print("Removing op {}:\n{}".format(op.type, op)) return [] # We want to apply to all operators, including converted # so run separately convert_op_in_proto(net, convert_remove_op) convert_op_in_proto(net, convert_upsample_nearest) convert_op_in_proto(net, convert_python) convert_op_in_proto(net, convert_op_name) convert_op_in_proto(net, convert_rpn_rois) reset_names(net.external_input) reset_names(net.external_output) reset_blob_names(blobs) def add_bbox_ops(args, net, blobs): new_ops = [] new_external_outputs = [] # Operators for bboxes op_box = core.CreateOperator( "BBoxTransform", ["rpn_rois", "bbox_pred", "im_info"], ["pred_bbox"], weights=cfg.MODEL.BBOX_REG_WEIGHTS, apply_scale=False, correct_transform_coords=True, ) new_ops.extend([op_box]) blob_prob = "cls_prob" blob_box = "pred_bbox" op_nms = core.CreateOperator( "BoxWithNMSLimit", [blob_prob, blob_box], ["score_nms", "bbox_nms", "class_nms"], arg=[ putils.MakeArgument("score_thresh", cfg.TEST.SCORE_THRESH), putils.MakeArgument("nms", cfg.TEST.NMS), putils.MakeArgument("detections_per_im", cfg.TEST.DETECTIONS_PER_IM), putils.MakeArgument("soft_nms_enabled", cfg.TEST.SOFT_NMS.ENABLED), putils.MakeArgument("soft_nms_method", cfg.TEST.SOFT_NMS.METHOD), putils.MakeArgument("soft_nms_sigma", cfg.TEST.SOFT_NMS.SIGMA), ], ) new_ops.extend([op_nms]) new_external_outputs.extend(["score_nms", "bbox_nms", "class_nms"]) net.Proto().op.extend(new_ops) net.Proto().external_output.extend(new_external_outputs) def convert_model_gpu(args, net, init_net): assert args.device == "gpu" ret_net = copy.deepcopy(net) ret_init_net = copy.deepcopy(init_net) cdo_cuda = mutils.get_device_option_cuda() cdo_cpu = mutils.get_device_option_cpu() CPU_OPS = [ ["CollectAndDistributeFpnRpnProposals", None], ["GenerateProposals", None], ["BBoxTransform", None], ["BoxWithNMSLimit", None], ] CPU_BLOBS = ["im_info", "anchor"] @op_filter() def convert_op_gpu(op): for x in CPU_OPS: if mutils.filter_op(op, type=x[0], inputs=x[1]): return None op.device_option.CopyFrom(cdo_cuda) return [op] @op_filter() def convert_init_op_gpu(op): if op.output[0] in CPU_BLOBS: op.device_option.CopyFrom(cdo_cpu) else: op.device_option.CopyFrom(cdo_cuda) return [op] convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu) convert_op_in_proto(ret_net.Proto(), convert_op_gpu) ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net]) return [ret[0][1], ret[0][0]] def gen_init_net(net, blobs, empty_blobs): blobs = copy.deepcopy(blobs) for x in empty_blobs: blobs[x] = np.array([], dtype=np.float32) init_net = mutils.gen_init_net_from_blobs(blobs, net.external_inputs) init_net = core.Net(init_net) return init_net def _save_image_graphs(args, all_net, all_init_net): print("Saving model graph...") mutils.save_graph( all_net.Proto(), os.path.join(args.out_dir, "model_def.png"), op_only=False ) print("Model def image saved to {}.".format(args.out_dir)) def _save_models(all_net, all_init_net, args): print("Writing converted model to {}...".format(args.out_dir)) fname = "model" if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) with open(os.path.join(args.out_dir, fname + ".pb"), "wb") as f: f.write(all_net.Proto().SerializeToString()) with open(os.path.join(args.out_dir, fname + ".pbtxt"), "wb") as f: f.write(str(all_net.Proto())) with open(os.path.join(args.out_dir, fname + "_init.pb"), "wb") as f: f.write(all_init_net.Proto().SerializeToString()) _save_image_graphs(args, all_net, all_init_net) def load_model(args): model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS) blobs = mutils.get_ws_blobs() return model, blobs def _get_result_blobs(check_blobs): ret = {} for x in check_blobs: sn = core.ScopedName(x) if workspace.HasBlob(sn): ret[x] = workspace.FetchBlob(sn) else: ret[x] = None return ret def _sort_results(boxes, segms, keypoints, classes): indices = np.argsort(boxes[:, -1])[::-1] if boxes is not None: boxes = boxes[indices, :] if segms is not None: segms = [segms[x] for x in indices] if keypoints is not None: keypoints = [keypoints[x] for x in indices] if classes is not None: if isinstance(classes, list): classes = [classes[x] for x in indices] else: classes = classes[indices] return boxes, segms, keypoints, classes def run_model_cfg(args, im, check_blobs): workspace.ResetWorkspace() model, _ = load_model(args) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all( model, im, None, None ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps ) # sort the results based on score for comparision boxes, segms, keypoints, classes = _sort_results(boxes, segms, keypoints, classes) # write final results back to workspace def _ornone(res): return np.array(res) if res is not None else np.array([], dtype=np.float32) with c2_utils.NamedCudaScope(0): workspace.FeedBlob(core.ScopedName("result_boxes"), _ornone(boxes)) workspace.FeedBlob(core.ScopedName("result_segms"), _ornone(segms)) workspace.FeedBlob(core.ScopedName("result_keypoints"), _ornone(keypoints)) workspace.FeedBlob(core.ScopedName("result_classids"), _ornone(classes)) # get result blobs with c2_utils.NamedCudaScope(0): ret = _get_result_blobs(check_blobs) return ret def _prepare_blobs(im, pixel_means, target_size, max_size): """ Reference: blob.prep_im_for_blob() """ im = im.astype(np.float32, copy=False) im -= pixel_means im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) im = cv2.resize( im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR ) # Reuse code in blob_utils and fit FPN blob = blob_utils.im_list_to_blob([im]) blobs = {} blobs["data"] = blob blobs["im_info"] = np.array( [[blob.shape[2], blob.shape[3], im_scale]], dtype=np.float32 ) return blobs def run_model_pb(args, net, init_net, im, check_blobs): workspace.ResetWorkspace() workspace.RunNetOnce(init_net) mutils.create_input_blobs_for_net(net.Proto()) workspace.CreateNet(net) # input_blobs, _ = core_test._get_blobs(im, None) input_blobs = _prepare_blobs(im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) gpu_blobs = [] if args.device == "gpu": gpu_blobs = ["data"] for k, v in input_blobs.items(): workspace.FeedBlob( core.ScopedName(k), v, mutils.get_device_option_cuda() if k in gpu_blobs else mutils.get_device_option_cpu(), ) try: workspace.RunNet(net) scores = workspace.FetchBlob("score_nms") classids = workspace.FetchBlob("class_nms") boxes = workspace.FetchBlob("bbox_nms") except Exception as e: print("Running pb model failed.\n{}".format(e)) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) boxes = np.column_stack((boxes, scores)) # sort the results based on score for comparision boxes, _, _, classids = _sort_results(boxes, None, None, classids) # write final result back to workspace workspace.FeedBlob("result_boxes", boxes) workspace.FeedBlob("result_classids", classids) ret = _get_result_blobs(check_blobs) return ret def verify_model(args, model_pb, test_img_file): check_blobs = ["result_boxes", "result_classids"] # result print("Loading test file {}...".format(test_img_file)) test_img = cv2.imread(test_img_file) assert test_img is not None def _run_cfg_func(im, blobs): return run_model_cfg(args, im, check_blobs) def _run_pb_func(im, blobs): return run_model_pb(args, model_pb[0], model_pb[1], im, check_blobs) print("Checking models...") assert mutils.compare_model(_run_cfg_func, _run_pb_func, test_img, check_blobs) def _export_to_logfiledb(args, net, init_net, inputs, out_file, extra_out_tensors=None): out_tensors = list(net.Proto().external_output) if extra_out_tensors is not None: out_tensors += extra_out_tensors params = list(set(net.Proto().external_input) - set(inputs)) net_type = None predictor_export_meta = predictor_exporter.PredictorExportMeta( predict_net=net, parameters=params, inputs=inputs, outputs=out_tensors, net_type=net_type, ) logger.info("Exporting Caffe2 model to {}".format(out_file)) predictor_exporter.save_to_db( db_type="log_file_db", db_destination=out_file, predictor_export_meta=predictor_export_meta, ) def main(): workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"]) args = parse_args() logger.info("Called with args:") logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) cfg.NUM_GPUS = 1 assert_and_infer_cfg() logger.info("Converting model with config:") logger.info(pprint.pformat(cfg)) # script will stop when it can't find an operator rather # than stopping based on these flags # # assert not cfg.MODEL.KEYPOINTS_ON, "Keypoint model not supported." # assert not cfg.MODEL.MASK_ON, "Mask model not supported." # assert not cfg.FPN.FPN_ON, "FPN not supported." # assert not cfg.RETINANET.RETINANET_ON, "RetinaNet model not supported." # load model from cfg model, blobs = load_model(args) net = core.Net("") net.Proto().op.extend(copy.deepcopy(model.net.Proto().op)) net.Proto().external_input.extend(copy.deepcopy(model.net.Proto().external_input)) net.Proto().external_output.extend(copy.deepcopy(model.net.Proto().external_output)) net.Proto().type = args.net_execution_type net.Proto().num_workers = 1 if args.net_execution_type == "simple" else 4 # Reset the device_option, change to unscope name and replace python operators convert_net(args, net.Proto(), blobs) # add operators for bbox add_bbox_ops(args, net, blobs) if args.fuse_af: print("Fusing affine channel...") net, blobs = mutils.fuse_net_affine(net, blobs) if args.use_nnpack: mutils.update_mobile_engines(net.Proto()) # generate init net empty_blobs = ["data", "im_info"] init_net = gen_init_net(net, blobs, empty_blobs) if args.device == "gpu": [net, init_net] = convert_model_gpu(args, net, init_net) net.Proto().name = args.net_name init_net.Proto().name = args.net_name + "_init" if args.test_img is not None: verify_model(args, [net, init_net], args.test_img) if args.logdb == 1: output_file = os.path.join(args.out_dir, "model.logfiledb") _export_to_logfiledb(args, net, init_net, empty_blobs, output_file) else: _save_models(net, init_net, args) if __name__ == "__main__": main() ================================================ FILE: tools/convert_selective_search.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Script to convert Selective Search proposal boxes into the Detectron proposal file format. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import scipy.io as sio import sys from detectron.datasets.json_dataset import JsonDataset from detectron.utils.io import save_object if __name__ == '__main__': dataset_name = sys.argv[1] file_in = sys.argv[2] file_out = sys.argv[3] ds = JsonDataset(dataset_name) roidb = ds.get_roidb() raw_data = sio.loadmat(file_in)['boxes'].ravel() assert raw_data.shape[0] == len(roidb) boxes = [] scores = [] ids = [] for i in range(raw_data.shape[0]): if i % 1000 == 0: print('{}/{}'.format(i + 1, len(roidb))) # selective search boxes are 1-indexed and (y1, x1, y2, x2) i_boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 boxes.append(i_boxes.astype(np.float32)) scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32)) ids.append(roidb[i]['id']) save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out) ================================================ FILE: tools/generate_testdev_from_test.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Given a full set of results (boxes, masks, or keypoints) on the 2017 COCO test set, this script extracts the results subset that corresponds to 2017 test-dev. The test-dev subset can then be submitted to the COCO evaluation server. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import json import os import sys from detectron.datasets.dataset_catalog import get_ann_fn from detectron.utils.timer import Timer def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( '--json', dest='json_file', help='detections json file', default='', type=str) parser.add_argument( '--output-dir', dest='output_dir', help='output directory', default='/tmp', type=str) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def convert(json_file, output_dir): print('Reading: {}'.format(json_file)) with open(json_file, 'r') as fid: dt = json.load(fid) print('done!') test_image_info = get_ann_fn('coco_2017_test') with open(test_image_info, 'r') as fid: info_test = json.load(fid) image_test = info_test['images'] image_test_id = [i['id'] for i in image_test] print('{} has {} images'.format(test_image_info, len(image_test_id))) test_dev_image_info = get_ann_fn('coco_2017_test-dev') with open(test_dev_image_info, 'r') as fid: info_testdev = json.load(fid) image_testdev = info_testdev['images'] image_testdev_id = [i['id'] for i in image_testdev] print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id))) dt_testdev = [] print('Filtering test-dev from test...') t = Timer() t.tic() for i in range(len(dt)): if i % 1000 == 0: print('{}/{}'.format(i, len(dt))) if dt[i]['image_id'] in image_testdev_id: dt_testdev.append(dt[i]) print('Done filtering ({:2}s)!'.format(t.toc())) filename, file_extension = os.path.splitext(os.path.basename(json_file)) filename = filename + '_test-dev' filename = os.path.join(output_dir, filename + file_extension) with open(filename, 'w') as fid: info_test = json.dump(dt_testdev, fid) print('Done writing: {}!'.format(filename)) if __name__ == '__main__': opts = parse_args() convert(opts.json_file, opts.output_dir) ================================================ FILE: tools/infer.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Perform inference on a single image or all images with a certain extension (e.g., .jpg) in a folder. Allows for using a combination of multiple models. For example, one model may be used for RPN, another model for Fast R-CNN style box detection, yet another model to predict masks, and yet another model to predict keypoints. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import logging import os import sys from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import load_cfg from detectron.core.config import merge_cfg_from_cfg from detectron.core.config import merge_cfg_from_file from detectron.utils.io import cache_url from detectron.utils.logging import setup_logging import detectron.core.rpn_generator as rpn_engine import detectron.core.test_engine as model_engine import detectron.datasets.dummy_datasets as dummy_datasets import detectron.utils.c2 as c2_utils import detectron.utils.env as envu import detectron.utils.vis as vis_utils c2_utils.import_detectron_ops() # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) # infer.py # --im [path/to/image.jpg] \ # --rpn-model [path/to/rpn/model.pkl] \ # --rpn-cfg [path/to/rpn/config.yaml] \ # --output-dir [path/to/output/dir] \ # [model1] [config1] [model2] [config2] ... def parse_args(): parser = argparse.ArgumentParser(description='Inference on an image') parser.add_argument( '--im', dest='im_file', help='input image', default=None, type=str ) parser.add_argument( '--rpn-pkl', dest='rpn_pkl', help='rpn model file (pkl)', default=None, type=str ) parser.add_argument( '--rpn-cfg', dest='rpn_cfg', help='cfg model file (yaml)', default=None, type=str ) parser.add_argument( '--output-dir', dest='output_dir', help='directory for visualization pdfs (default: /tmp/infer)', default='/tmp/infer', type=str ) parser.add_argument( 'models_to_run', help='pairs of models & configs, listed like so: [pkl1] [yaml1] [pkl2] [yaml2] ...', default=None, nargs=argparse.REMAINDER ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) return parser.parse_args() def get_rpn_box_proposals(im, args): cfg.immutable(False) merge_cfg_from_file(args.rpn_cfg) cfg.NUM_GPUS = 1 cfg.MODEL.RPN_ONLY = True cfg.TEST.RPN_PRE_NMS_TOP_N = 10000 cfg.TEST.RPN_POST_NMS_TOP_N = 2000 assert_and_infer_cfg(cache_urls=False) model = model_engine.initialize_model_from_cfg(args.rpn_pkl) with c2_utils.NamedCudaScope(0): boxes, scores = rpn_engine.im_proposals(model, im) return boxes, scores def main(args): logger = logging.getLogger(__name__) dummy_coco_dataset = dummy_datasets.get_coco_dataset() cfg_orig = load_cfg(envu.yaml_dump(cfg)) im = cv2.imread(args.im_file) if args.rpn_pkl is not None: proposal_boxes, _proposal_scores = get_rpn_box_proposals(im, args) workspace.ResetWorkspace() else: proposal_boxes = None cls_boxes, cls_segms, cls_keyps = None, None, None for i in range(0, len(args.models_to_run), 2): pkl = args.models_to_run[i] yml = args.models_to_run[i + 1] cfg.immutable(False) merge_cfg_from_cfg(cfg_orig) merge_cfg_from_file(yml) if len(pkl) > 0: weights_file = pkl else: weights_file = cfg.TEST.WEIGHTS cfg.NUM_GPUS = 1 assert_and_infer_cfg(cache_urls=False) model = model_engine.initialize_model_from_cfg(weights_file) with c2_utils.NamedCudaScope(0): cls_boxes_, cls_segms_, cls_keyps_ = \ model_engine.im_detect_all(model, im, proposal_boxes) cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps workspace.ResetWorkspace() out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(args.im_file) + '.pdf') ) logger.info('Processing {} -> {}'.format(args.im_file, out_name)) vis_utils.vis_one_image( im[:, :, ::-1], args.im_file, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2 ) def check_args(args): assert ( (args.rpn_pkl is not None and args.rpn_cfg is not None) or (args.rpn_pkl is None and args.rpn_cfg is None) ) if args.rpn_pkl is not None: args.rpn_pkl = cache_url(args.rpn_pkl, cfg.DOWNLOAD_CACHE) assert os.path.exists(args.rpn_pkl) assert os.path.exists(args.rpn_cfg) if args.models_to_run is not None: assert len(args.models_to_run) % 2 == 0 for i, model_file in enumerate(args.models_to_run): if len(model_file) > 0: if i % 2 == 0: model_file = cache_url(model_file, cfg.DOWNLOAD_CACHE) args.models_to_run[i] = model_file assert os.path.exists(model_file), \ '\'{}\' does not exist'.format(model_file) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) setup_logging(__name__) args = parse_args() check_args(args) main(args) ================================================ FILE: tools/infer_simple.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Perform inference on a single image or all images with a certain extension (e.g., .jpg) in a folder. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import defaultdict import argparse import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import glob import logging import os import sys import time from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import merge_cfg_from_file from detectron.utils.io import cache_url from detectron.utils.logging import setup_logging from detectron.utils.timer import Timer import detectron.core.test_engine as infer_engine import detectron.datasets.dummy_datasets as dummy_datasets import detectron.utils.c2 as c2_utils import detectron.utils.vis as vis_utils c2_utils.import_detectron_ops() # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) def parse_args(): parser = argparse.ArgumentParser(description='End-to-end inference') parser.add_argument( '--cfg', dest='cfg', help='cfg model file (/path/to/model_config.yaml)', default=None, type=str ) parser.add_argument( '--wts', dest='weights', help='weights model file (/path/to/model_weights.pkl)', default=None, type=str ) parser.add_argument( '--output-dir', dest='output_dir', help='directory for visualization pdfs (default: /tmp/infer_simple)', default='/tmp/infer_simple', type=str ) parser.add_argument( '--image-ext', dest='image_ext', help='image file name extension (default: jpg)', default='jpg', type=str ) parser.add_argument( '--always-out', dest='out_when_no_box', help='output image even when no object is found', action='store_true' ) parser.add_argument( '--output-ext', dest='output_ext', help='output image file format (default: pdf)', default='pdf', type=str ) parser.add_argument( '--thresh', dest='thresh', help='Threshold for visualizing detections', default=0.7, type=float ) parser.add_argument( '--kp-thresh', dest='kp_thresh', help='Threshold for visualizing keypoints', default=2.0, type=float ) parser.add_argument( 'im_or_folder', help='image or folder of images', default=None ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) return parser.parse_args() def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext) ) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=args.thresh, kp_thresh=args.kp_thresh, ext=args.output_ext, out_when_no_box=args.out_when_no_box ) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) setup_logging(__name__) args = parse_args() main(args) ================================================ FILE: tools/pickle_caffe_blobs.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Script for converting Caffe (<= 1.0) models into the the simple state dict format used by Detectron. For example, this script can convert the orignal ResNet models released by MSRA. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import numpy as np import os import sys from caffe.proto import caffe_pb2 from caffe2.proto import caffe2_pb2 from caffe2.python import caffe_translator from caffe2.python import utils from google.protobuf import text_format from detectron.utils.io import save_object def parse_args(): parser = argparse.ArgumentParser( description='Dump weights from a Caffe model' ) parser.add_argument( '--prototxt', dest='prototxt_file_name', help='Network definition prototxt file path', default=None, type=str ) parser.add_argument( '--caffemodel', dest='caffemodel_file_name', help='Pretrained network weights file path', default=None, type=str ) parser.add_argument( '--output', dest='out_file_name', help='Output file path', default=None, type=str ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def normalize_resnet_name(name): if name.find('res') == 0 and name.find('res_') == -1: # E.g., # res4b11_branch2c -> res4_11_branch2c # res2a_branch1 -> res2_0_branch1 chunk = name[len('res'):name.find('_')] name = ( 'res' + chunk[0] + '_' + str( int(chunk[2:]) if len(chunk) > 2 # e.g., "b1" -> 1 else ord(chunk[1]) - ord('a') ) + # e.g., "a" -> 0 name[name.find('_'):] ) return name def pickle_weights(out_file_name, weights): blobs = { normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob) for blob in weights.protos } save_object(blobs, out_file_name) print('Wrote blobs:') print(sorted(blobs.keys())) def add_missing_biases(caffenet_weights): for layer in caffenet_weights.layer: if layer.type == 'Convolution' and len(layer.blobs) == 1: num_filters = layer.blobs[0].shape.dim[0] bias_blob = caffe_pb2.BlobProto() bias_blob.data.extend(np.zeros(num_filters)) bias_blob.num, bias_blob.channels, bias_blob.height = 1, 1, 1 bias_blob.width = num_filters layer.blobs.extend([bias_blob]) def remove_spatial_bn_layers(caffenet, caffenet_weights): # Layer types associated with spatial batch norm remove_types = ['BatchNorm', 'Scale'] def _remove_layers(net): for i in reversed(range(len(net.layer))): if net.layer[i].type in remove_types: net.layer.pop(i) # First remove layers from caffenet proto _remove_layers(caffenet) # We'll return these so we can save the batch norm parameters bn_layers = [ layer for layer in caffenet_weights.layer if layer.type in remove_types ] _remove_layers(caffenet_weights) def _create_tensor(arr, shape, name): t = caffe2_pb2.TensorProto() t.name = name t.data_type = caffe2_pb2.TensorProto.FLOAT t.dims.extend(shape.dim) t.float_data.extend(arr) assert len(t.float_data) == np.prod(t.dims), 'Data size, shape mismatch' return t bn_tensors = [] for (bn, scl) in zip(bn_layers[0::2], bn_layers[1::2]): assert bn.name[len('bn'):] == scl.name[len('scale'):], 'Pair mismatch' blob_out = 'res' + bn.name[len('bn'):] + '_bn' bn_mean = np.asarray(bn.blobs[0].data) bn_var = np.asarray(bn.blobs[1].data) scale = np.asarray(scl.blobs[0].data) bias = np.asarray(scl.blobs[1].data) std = np.sqrt(bn_var + 1e-5) new_scale = scale / std new_bias = bias - bn_mean * scale / std new_scale_tensor = _create_tensor( new_scale, bn.blobs[0].shape, blob_out + '_s' ) new_bias_tensor = _create_tensor( new_bias, bn.blobs[0].shape, blob_out + '_b' ) bn_tensors.extend([new_scale_tensor, new_bias_tensor]) return bn_tensors def remove_layers_without_parameters(caffenet, caffenet_weights): for i in reversed(range(len(caffenet_weights.layer))): if len(caffenet_weights.layer[i].blobs) == 0: # Search for the corresponding layer in caffenet and remove it name = caffenet_weights.layer[i].name found = False for j in range(len(caffenet.layer)): if caffenet.layer[j].name == name: caffenet.layer.pop(j) found = True break if not found and name[-len('_split'):] != '_split': print('Warning: layer {} not found in caffenet'.format(name)) caffenet_weights.layer.pop(i) def normalize_shape(caffenet_weights): for layer in caffenet_weights.layer: for blob in layer.blobs: shape = (blob.num, blob.channels, blob.height, blob.width) if len(blob.data) != np.prod(shape): shape = tuple(blob.shape.dim) if len(shape) == 1: # Handle biases shape = (1, 1, 1, shape[0]) if len(shape) == 2: # Handle InnerProduct layers shape = (1, 1, shape[0], shape[1]) assert len(shape) == 4 blob.num, blob.channels, blob.height, blob.width = shape def load_and_convert_caffe_model(prototxt_file_name, caffemodel_file_name): caffenet = caffe_pb2.NetParameter() caffenet_weights = caffe_pb2.NetParameter() text_format.Merge(open(prototxt_file_name).read(), caffenet) caffenet_weights.ParseFromString(open(caffemodel_file_name).read()) # C2 conv layers current require biases, but they are optional in C1 # Add zeros as biases is they are missing add_missing_biases(caffenet_weights) # We only care about getting parameters, so remove layers w/o parameters remove_layers_without_parameters(caffenet, caffenet_weights) # BatchNorm is not implemented in the translator *and* we need to fold Scale # layers into the new C2 SpatialBN op, hence we remove the batch norm layers # and apply custom translations code bn_weights = remove_spatial_bn_layers(caffenet, caffenet_weights) # Set num, channel, height and width for blobs that use shape.dim instead normalize_shape(caffenet_weights) # Translate the rest of the model net, pretrained_weights = caffe_translator.TranslateModel( caffenet, caffenet_weights ) pretrained_weights.protos.extend(bn_weights) return net, pretrained_weights if __name__ == '__main__': args = parse_args() assert os.path.exists(args.prototxt_file_name), \ 'Prototxt file does not exist' assert os.path.exists(args.caffemodel_file_name), \ 'Weights file does not exist' net, weights = load_and_convert_caffe_model( args.prototxt_file_name, args.caffemodel_file_name ) pickle_weights(args.out_file_name, weights) ================================================ FILE: tools/reval.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Reval = re-eval. Re-evaluate saved detections.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import os import sys from detectron.core.config import cfg from detectron.datasets import task_evaluation from detectron.datasets.json_dataset import JsonDataset from detectron.utils.io import load_object from detectron.utils.logging import setup_logging import detectron.core.config as core_config def parse_args(): parser = argparse.ArgumentParser(description='Re-evaluate results') parser.add_argument( 'output_dir', nargs=1, help='results directory', type=str ) parser.add_argument( '--dataset', dest='dataset_name', help='dataset to re-evaluate', default='voc_2007_test', type=str ) parser.add_argument( '--matlab', dest='matlab_eval', help='use matlab for evaluation', action='store_true' ) parser.add_argument( '--comp', dest='comp_mode', help='competition mode', action='store_true' ) parser.add_argument( '--cfg', dest='cfg_file', help='optional config file', default=None, type=str ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def do_reval(dataset_name, output_dir, args): dataset = JsonDataset(dataset_name) dets = load_object(os.path.join(output_dir, 'detections.pkl')) # Override config with the one saved in the detections file if args.cfg_file is not None: core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg'])) else: core_config._merge_a_into_b(core_config.load_cfg(dets['cfg']), cfg) results = task_evaluation.evaluate_all( dataset, dets['all_boxes'], dets['all_segms'], dets['all_keyps'], output_dir, use_matlab=args.matlab_eval ) task_evaluation.log_copy_paste_friendly_results(results) if __name__ == '__main__': setup_logging(__name__) args = parse_args() if args.comp_mode: cfg.TEST.COMPETITION_MODE = True output_dir = os.path.abspath(args.output_dir[0]) do_reval(args.dataset_name, output_dir, args) ================================================ FILE: tools/test_net.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Perform inference on one or more datasets.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import os import pprint import sys import time from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import merge_cfg_from_file from detectron.core.config import merge_cfg_from_list from detectron.core.test_engine import run_inference from detectron.utils.logging import setup_logging import detectron.utils.c2 as c2_utils c2_utils.import_detectron_ops() # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) def parse_args(): parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') parser.add_argument( '--cfg', dest='cfg_file', help='optional config file', default=None, type=str ) parser.add_argument( '--wait', dest='wait', help='wait until net file exists', default=True, type=bool ) parser.add_argument( '--vis', dest='vis', help='visualize detections', action='store_true' ) parser.add_argument( '--multi-gpu-testing', dest='multi_gpu_testing', help='using cfg.NUM_GPUS for inference', action='store_true' ) parser.add_argument( '--range', dest='range', help='start (inclusive) and end (exclusive) indices', default=None, type=int, nargs=2 ) parser.add_argument( 'opts', help='See detectron/core/config.py for all options', default=None, nargs=argparse.REMAINDER ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) return parser.parse_args() if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) logger = setup_logging(__name__) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() logger.info('Testing with config:') logger.info(pprint.pformat(cfg)) while not os.path.exists(cfg.TEST.WEIGHTS) and args.wait: logger.info('Waiting for \'{}\' to exist...'.format(cfg.TEST.WEIGHTS)) time.sleep(10) run_inference( cfg.TEST.WEIGHTS, ind_range=args.range, multi_gpu_testing=args.multi_gpu_testing, check_expected_results=True, ) ================================================ FILE: tools/train_net.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Train a network with Detectron.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import cv2 # NOQA (Must import before importing caffe2 due to bug in cv2) import logging import numpy as np import pprint import sys from caffe2.python import workspace from detectron.core.config import assert_and_infer_cfg from detectron.core.config import cfg from detectron.core.config import merge_cfg_from_file from detectron.core.config import merge_cfg_from_list from detectron.core.test_engine import run_inference from detectron.utils.logging import setup_logging import detectron.utils.c2 as c2_utils import detectron.utils.train c2_utils.import_contrib_ops() c2_utils.import_detectron_ops() # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) def parse_args(): parser = argparse.ArgumentParser( description='Train a network with Detectron' ) parser.add_argument( '--cfg', dest='cfg_file', help='Config file for training (and optionally testing)', default=None, type=str ) parser.add_argument( '--multi-gpu-testing', dest='multi_gpu_testing', help='Use cfg.NUM_GPUS GPUs for inference', action='store_true' ) parser.add_argument( '--skip-test', dest='skip_test', help='Do not test the final model', action='store_true' ) parser.add_argument( 'opts', help='See detectron/core/config.py for all options', default=None, nargs=argparse.REMAINDER ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) return parser.parse_args() def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'] ) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info() logger.info("cuda version : {}".format(cuda_ver)) logger.info("cudnn version: {}".format(cudnn_ver)) logger.info("nvidia-smi output:\n{}".format(smi_output)) logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # Execute the training run checkpoints = detectron.utils.train.train_model() # Test the trained model if not args.skip_test: test_model(checkpoints['final'], args.multi_gpu_testing, args.opts) def test_model(model_file, multi_gpu_testing, opts=None): """Test a model.""" # Clear memory before inference workspace.ResetWorkspace() # Run inference run_inference( model_file, multi_gpu_testing=multi_gpu_testing, check_expected_results=True, ) if __name__ == '__main__': main() ================================================ FILE: tools/visualize_results.py ================================================ #!/usr/bin/env python # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## """Script for visualizing results saved in a detections.pkl file.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import cv2 import os import sys from detectron.datasets.json_dataset import JsonDataset from detectron.utils.io import load_object import detectron.utils.vis as vis_utils # OpenCL may be enabled by default in OpenCV3; disable it because it's not # thread safe and causes unwanted GPU memory allocations. cv2.ocl.setUseOpenCL(False) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( '--dataset', dest='dataset', help='dataset', default='coco_2014_minival', type=str ) parser.add_argument( '--detections', dest='detections', help='detections pkl file', default='', type=str ) parser.add_argument( '--thresh', dest='thresh', help='detection prob threshold', default=0.9, type=float ) parser.add_argument( '--output-dir', dest='output_dir', help='output directory', default='./tmp/vis-output', type=str ) parser.add_argument( '--first', dest='first', help='only visualize the first k images', default=0, type=int ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def vis(dataset, detections_pkl, thresh, output_dir, limit=0): ds = JsonDataset(dataset) roidb = ds.get_roidb() dets = load_object(detections_pkl) assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \ 'Expected detections pkl file in the format used by test_engine.py' all_boxes = dets['all_boxes'] all_segms = dets['all_segms'] all_keyps = dets['all_keyps'] def id_or_index(ix, val): if len(val) == 0: return val else: return val[ix] for ix, entry in enumerate(roidb): if limit > 0 and ix >= limit: break if ix % 10 == 0: print('{:d}/{:d}'.format(ix + 1, len(roidb))) im = cv2.imread(entry['image']) im_name = os.path.splitext(os.path.basename(entry['image']))[0] cls_boxes_i = [ id_or_index(ix, cls_k_boxes) for cls_k_boxes in all_boxes ] cls_segms_i = [ id_or_index(ix, cls_k_segms) for cls_k_segms in all_segms ] cls_keyps_i = [ id_or_index(ix, cls_k_keyps) for cls_k_keyps in all_keyps ] vis_utils.vis_one_image( im[:, :, ::-1], '{:d}_{:s}'.format(ix, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i, segms=cls_segms_i, keypoints=cls_keyps_i, thresh=thresh, box_alpha=0.8, dataset=ds, show_class=True ) if __name__ == '__main__': opts = parse_args() vis( opts.dataset, opts.detections, opts.thresh, opts.output_dir, limit=opts.first )