Repository: xingyizhou/CenterNet
Branch: master
Commit: 4c50fd3a46bd
Files: 156
Total size: 1.2 MB
Directory structure:
gitextract_hokebfv4/
├── .gitignore
├── .travis.yml
├── LICENSE
├── NOTICE
├── README.md
├── data/
│ └── .gitignore
├── exp/
│ └── .gitignore
├── experiments/
│ ├── ctdet_coco_dla_1x.sh
│ ├── ctdet_coco_dla_2x.sh
│ ├── ctdet_coco_hg.sh
│ ├── ctdet_coco_resdcn101.sh
│ ├── ctdet_coco_resdcn18.sh
│ ├── ctdet_pascal_dla_384.sh
│ ├── ctdet_pascal_dla_512.sh
│ ├── ctdet_pascal_resdcn101_384.sh
│ ├── ctdet_pascal_resdcn101_512.sh
│ ├── ctdet_pascal_resdcn18_384.sh
│ ├── ctdet_pascal_resdcn18_512.sh
│ ├── ddd_3dop.sh
│ ├── ddd_sub.sh
│ ├── exdet_coco_dla.sh
│ ├── exdet_coco_hg.sh
│ ├── multi_pose_dla_1x.sh
│ ├── multi_pose_dla_3x.sh
│ ├── multi_pose_hg_1x.sh
│ └── multi_pose_hg_3x.sh
├── images/
│ └── NOTICE
├── models/
│ └── .gitignore
├── readme/
│ ├── DATA.md
│ ├── DEVELOP.md
│ ├── GETTING_STARTED.md
│ ├── INSTALL.md
│ └── MODEL_ZOO.md
├── requirements.txt
└── src/
├── _init_paths.py
├── demo.py
├── lib/
│ ├── datasets/
│ │ ├── dataset/
│ │ │ ├── coco.py
│ │ │ ├── coco_hp.py
│ │ │ ├── kitti.py
│ │ │ └── pascal.py
│ │ ├── dataset_factory.py
│ │ └── sample/
│ │ ├── ctdet.py
│ │ ├── ddd.py
│ │ ├── exdet.py
│ │ └── multi_pose.py
│ ├── detectors/
│ │ ├── base_detector.py
│ │ ├── ctdet.py
│ │ ├── ddd.py
│ │ ├── detector_factory.py
│ │ ├── exdet.py
│ │ └── multi_pose.py
│ ├── external/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── nms.pyx
│ │ └── setup.py
│ ├── logger.py
│ ├── models/
│ │ ├── data_parallel.py
│ │ ├── decode.py
│ │ ├── losses.py
│ │ ├── model.py
│ │ ├── networks/
│ │ │ ├── DCNv2/
│ │ │ │ ├── .gitignore
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── build.py
│ │ │ │ ├── build_double.py
│ │ │ │ ├── dcn_v2.py
│ │ │ │ ├── dcn_v2_func.py
│ │ │ │ ├── make.sh
│ │ │ │ ├── src/
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda_double.cu
│ │ │ │ │ │ └── dcn_v2_psroi_pooling_cuda_double.h
│ │ │ │ │ ├── dcn_v2.c
│ │ │ │ │ ├── dcn_v2.h
│ │ │ │ │ ├── dcn_v2_cuda.c
│ │ │ │ │ ├── dcn_v2_cuda.h
│ │ │ │ │ ├── dcn_v2_cuda_double.c
│ │ │ │ │ ├── dcn_v2_cuda_double.h
│ │ │ │ │ ├── dcn_v2_double.c
│ │ │ │ │ └── dcn_v2_double.h
│ │ │ │ └── test.py
│ │ │ ├── dlav0.py
│ │ │ ├── large_hourglass.py
│ │ │ ├── msra_resnet.py
│ │ │ ├── pose_dla_dcn.py
│ │ │ └── resnet_dcn.py
│ │ ├── scatter_gather.py
│ │ └── utils.py
│ ├── opts.py
│ ├── trains/
│ │ ├── base_trainer.py
│ │ ├── ctdet.py
│ │ ├── ddd.py
│ │ ├── exdet.py
│ │ ├── multi_pose.py
│ │ └── train_factory.py
│ └── utils/
│ ├── __init__.py
│ ├── ddd_utils.py
│ ├── debugger.py
│ ├── image.py
│ ├── oracle_utils.py
│ ├── post_process.py
│ └── utils.py
├── main.py
├── test.py
└── tools/
├── _init_paths.py
├── calc_coco_overlap.py
├── convert_hourglass_weight.py
├── convert_kitti_to_coco.py
├── eval_coco.py
├── eval_coco_hp.py
├── get_kitti.sh
├── get_pascal_voc.sh
├── kitti_eval/
│ ├── README.md
│ ├── evaluate_object_3d.cpp
│ ├── evaluate_object_3d_offline
│ ├── evaluate_object_3d_offline.cpp
│ └── mail.h
├── merge_pascal_json.py
├── reval.py
├── vis_pred.py
└── voc_eval_lib/
├── LICENSE
├── Makefile
├── __init__.py
├── datasets/
│ ├── __init__.py
│ ├── bbox.pyx
│ ├── ds_utils.py
│ ├── imdb.py
│ ├── pascal_voc.py
│ └── voc_eval.py
├── model/
│ ├── __init__.py
│ ├── bbox_transform.py
│ ├── config.py
│ ├── nms_wrapper.py
│ └── test.py
├── nms/
│ ├── .gitignore
│ ├── __init__.py
│ ├── cpu_nms.c
│ ├── cpu_nms.pyx
│ ├── gpu_nms.cpp
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── setup.py
└── utils/
├── .gitignore
├── __init__.py
├── bbox.pyx
├── blob.py
├── timer.py
└── visualization.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
legacy/*
.DS_Store
debug/*
*.DS_Store
*.json
*.mat
src/.vscode/*
preds/*
*.h5
*.pth
*.checkpoint
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: .travis.yml
================================================
group: travis_latest
dist: xenial # ubuntu-16.04
language: python
cache: pip
python:
- 3.6
- 3.7
install:
- pip install flake8
- pip install -r requirements.txt
before_script:
# stop the build if there are Python syntax errors or undefined names
- flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
script:
- true # add other tests here
notifications:
on_success: change
on_failure: change # `always` will be the setting once code changes slow down
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2019 Xingyi Zhou
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: NOTICE
================================================
Portions of this software are derived from tf-faster-rcnn.
==============================================================================
tf-faster-rcnn licence
==============================================================================
MIT License
Copyright (c) 2017 Xinlei Chen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Portions of this software are derived from human-pose-estimation.pytorch.
==============================================================================
human-pose-estimation.pytorch licence
==============================================================================
MIT License
Copyright (c) Microsoft Corporation. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE
Portions of this software are derived from CornerNet.
==============================================================================
CornerNet licence
==============================================================================
BSD 3-Clause License
Copyright (c) 2018, University of Michigan
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Portions of this software are derived from DCNv2.
==============================================================================
DCNv2 licence
==============================================================================
BSD 3-Clause License
Copyright (c) 2019, Charles Shang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
==============================================================================
DLA licence
==============================================================================
BSD 3-Clause License
Copyright (c) 2018, Fisher Yu
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
# Objects as Points
Object detection, 3D detection, and pose estimation using center point detection:

> [**Objects as Points**](http://arxiv.org/abs/1904.07850),
> Xingyi Zhou, Dequan Wang, Philipp Krähenbühl,
> *arXiv technical report ([arXiv 1904.07850](http://arxiv.org/abs/1904.07850))*
Contact: [zhouxy2017@gmail.com](mailto:zhouxy2017@gmail.com). Any questions or discussions are welcomed!
## Updates
- (June, 2020) We released a state-of-the-art Lidar-based 3D detection and tracking framework [CenterPoint](https://github.com/tianweiy/CenterPoint).
- (April, 2020) We released a state-of-the-art (multi-category-/ pose-/ 3d-) tracking extension [CenterTrack](https://github.com/xingyizhou/CenterTrack).
## Abstract
Detection identifies objects as axis-aligned boxes in an image. Most successful object detectors enumerate a nearly exhaustive list of potential object locations and classify each. This is wasteful, inefficient, and requires additional post-processing. In this paper, we take a different approach. We model an object as a single point -- the center point of its bounding box. Our detector uses keypoint estimation to find center points and regresses to all other object properties, such as size, 3D location, orientation, and even pose. Our center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors. CenterNet achieves the best speed-accuracy trade-off on the MS COCO dataset, with 28.1% AP at 142 FPS, 37.4% AP at 52 FPS, and 45.1% AP with multi-scale testing at 1.4 FPS. We use the same approach to estimate 3D bounding box in the KITTI benchmark and human pose on the COCO keypoint dataset. Our method performs competitively with sophisticated multi-stage methods and runs in real-time.
## Highlights
- **Simple:** One-sentence method summary: use keypoint detection technic to detect the bounding box center point and regress to all other object properties like bounding box size, 3d information, and pose.
- **Versatile:** The same framework works for object detection, 3d bounding box estimation, and multi-person pose estimation with minor modification.
- **Fast:** The whole process in a single network feedforward. No NMS post processing is needed. Our DLA-34 model runs at *52* FPS with *37.4* COCO AP.
- **Strong**: Our best single model achieves *45.1*AP on COCO test-dev.
- **Easy to use:** We provide user friendly testing API and webcam demos.
## Main results
### Object Detection on COCO validation
| Backbone | AP / FPS | Flip AP / FPS| Multi-scale AP / FPS |
|--------------|-----------|--------------|-----------------------|
|Hourglass-104 | 40.3 / 14 | 42.2 / 7.8 | 45.1 / 1.4 |
|DLA-34 | 37.4 / 52 | 39.2 / 28 | 41.7 / 4 |
|ResNet-101 | 34.6 / 45 | 36.2 / 25 | 39.3 / 4 |
|ResNet-18 | 28.1 / 142| 30.0 / 71 | 33.2 / 12 |
### Keypoint detection on COCO validation
| Backbone | AP | FPS |
|--------------|-----------|--------------|
|Hourglass-104 | 64.0 | 6.6 |
|DLA-34 | 58.9 | 23 |
### 3D bounding box detection on KITTI validation
|Backbone|FPS|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H|
|--------|---|----|----|----|-----|-----|-----|-----|-----|-----|
|DLA-34 |32 |96.9|87.8|79.2|93.9 |84.3 |75.7 |34.0 |30.5 |26.8 |
All models and details are available in our [Model zoo](readme/MODEL_ZOO.md).
## Installation
Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
## Use CenterNet
We support demo for image/ image folder, video, and webcam.
First, download the models (By default, [ctdet_coco_dla_2x](https://drive.google.com/file/d/18Q3fzzAsha_3Qid6mn4jcIFPeOGUaj1d) for detection and
[multi_pose_dla_3x](https://drive.google.com/file/d/1mC2PAQT_RuHi_9ZMZgkt4rg7BSY2_Lkd) for human pose estimation)
from the [Model zoo](readme/MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
For object detection on images/ video, run:
~~~
python demo.py ctdet --demo /path/to/image/or/folder/or/video --load_model ../models/ctdet_coco_dla_2x.pth
~~~
We provide example images in `CenterNet_ROOT/images/` (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/demo)). If set up correctly, the output should look like
For webcam demo, run
~~~
python demo.py ctdet --demo webcam --load_model ../models/ctdet_coco_dla_2x.pth
~~~
Similarly, for human pose estimation, run:
~~~
python demo.py multi_pose --demo /path/to/image/or/folder/or/video/or/webcam --load_model ../models/multi_pose_dla_3x.pth
~~~
The result for the example images should look like:
You can add `--debug 2` to visualize the heatmap outputs.
You can add `--flip_test` for flip test.
To use this CenterNet in your own project, you can
~~~
import sys
CENTERNET_PATH = /path/to/CenterNet/src/lib/
sys.path.insert(0, CENTERNET_PATH)
from detectors.detector_factory import detector_factory
from opts import opts
MODEL_PATH = /path/to/model
TASK = 'ctdet' # or 'multi_pose' for human pose estimation
opt = opts().init('{} --load_model {}'.format(TASK, MODEL_PATH).split(' '))
detector = detector_factory[opt.task](opt)
img = image/or/path/to/your/image/
ret = detector.run(img)['results']
~~~
`ret` will be a python dict: `{category_id : [[x1, y1, x2, y2, score], ...], }`
## Benchmark Evaluation and Training
After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper.
We provide scripts for all the experiments in the [experiments](experiments) folder.
## Develop
If you are interested in training CenterNet in a new dataset, use CenterNet in a new task, or use a new network architecture for CenterNet, please refer to [DEVELOP.md](readme/DEVELOP.md). Also feel free to send us emails for discussions or suggestions.
## Third-party resources
- CenterNet + embedding learning based tracking: [FairMOT](https://github.com/ifzhang/FairMOT) from [Yifu Zhang](https://github.com/ifzhang).
- Detectron2 based implementation: [CenterNet-better](https://github.com/FateScript/CenterNet-better) from [Feng Wang](https://github.com/FateScript).
- Keras Implementation: [keras-centernet](https://github.com/see--/keras-centernet) from [see--](https://github.com/see--) and [keras-CenterNet](https://github.com/xuannianz/keras-CenterNet) from [xuannianz](https://github.com/xuannianz).
- MXnet implementation: [mxnet-centernet](https://github.com/Guanghan/mxnet-centernet) from [Guanghan Ning](https://github.com/Guanghan).
- Stronger human open estimation models: [centerpose](https://github.com/tensorboy/centerpose) from [tensorboy](https://github.com/tensorboy).
- TensorRT extension with ONNX models: [TensorRT-CenterNet](https://github.com/CaoWGG/TensorRT-CenterNet) from [Wengang Cao](https://github.com/CaoWGG).
- CenterNet + DeepSORT tracking implementation: [centerNet-deep-sort](https://github.com/kimyoon-young/centerNet-deep-sort) from [kimyoon-young](https://github.com/kimyoon-young).
- Blogs on training CenterNet on custom datasets (in Chinese): [ships](https://blog.csdn.net/weixin_42634342/article/details/97756458) from [Rhett Chen](https://blog.csdn.net/weixin_42634342) and [faces](https://blog.csdn.net/weixin_41765699/article/details/100118353) from [linbior](https://me.csdn.net/weixin_41765699).
## License
CenterNet itself is released under the MIT License (refer to the LICENSE file for details).
Portions of the code are borrowed from [human-pose-estimation.pytorch](https://github.com/Microsoft/human-pose-estimation.pytorch) (image transform, resnet), [CornerNet](https://github.com/princeton-vl/CornerNet) (hourglassnet, loss functions), [dla](https://github.com/ucbdrive/dla) (DLA network), [DCNv2](https://github.com/CharlesShang/DCNv2)(deformable convolutions), [tf-faster-rcnn](https://github.com/endernewton/tf-faster-rcnn)(Pascal VOC evaluation) and [kitti_eval](https://github.com/prclibo/kitti_eval) (KITTI dataset evaluation). Please refer to the original License of these projects (See [NOTICE](NOTICE)).
## Citation
If you find this project useful for your research, please use the following BibTeX entry.
@inproceedings{zhou2019objects,
title={Objects as Points},
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
booktitle={arXiv preprint arXiv:1904.07850},
year={2019}
}
================================================
FILE: data/.gitignore
================================================
*
!.gitignore
================================================
FILE: exp/.gitignore
================================================
*
!.gitignore
================================================
FILE: experiments/ctdet_coco_dla_1x.sh
================================================
cd src
# train
python main.py ctdet --exp_id coco_dla_1x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
# test
python test.py ctdet --exp_id coco_dla_1x --keep_res --resume
# flip test
python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test
# multi scale test
python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/ctdet_coco_dla_2x.sh
================================================
cd src
# train
python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 230 lr_step 180,210
# or use the following command if your have coco_s2_dla_1x trained
# python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/ctdet/coco_dla_1x/model_90.pth --resume
# test
python test.py ctdet --exp_id coco_dla_2x --keep_res --resume
# flip test
python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test
# multi scale test
python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/ctdet_coco_hg.sh
================================================
cd src
# train
python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4
# test
python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume
# flip test
python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test
# multi scale test
python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/ctdet_coco_resdcn101.sh
================================================
cd src
# train
python main.py ctdet --exp_id coco_resdcn101 --arch resdcn_101 --batch_size 96 --master_batch 5 --lr 3.75e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
# test
python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume
# flip test
python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test
# multi scale test
python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/ctdet_coco_resdcn18.sh
================================================
cd src
# train
python main.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --batch_size 114 --master_batch 18 --lr 5e-4 --gpus 0,1,2,3 --num_workers 16
# test
python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume
# flip test
python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test
# multi scale test
python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/ctdet_pascal_dla_384.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_dla_384 --dataset pascal --num_epochs 70 --lr_step 45,60
# test
python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume
# flip test
python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume --flip_test
cd ..
================================================
FILE: experiments/ctdet_pascal_dla_512.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1
# test
python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume
# flip test
python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume --flip_test
cd ..
================================================
FILE: experiments/ctdet_pascal_resdcn101_384.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --num_epochs 70 --lr_step 45,60 --gpus 0,1
# test
python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume
# flip test
python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume --flip_test
cd ..
================================================
FILE: experiments/ctdet_pascal_resdcn101_512.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1,2,3
# test
python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume
# flip test
python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume --flip_test
cd ..
================================================
FILE: experiments/ctdet_pascal_resdcn18_384.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --num_epochs 70 --lr_step 45,60
# test
python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume
# flip test
python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume --flip_test
cd ..
================================================
FILE: experiments/ctdet_pascal_resdcn18_512.sh
================================================
cd src
# train
python main.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60
# test
python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume
# flip test
python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume --flip_test
cd ..
================================================
FILE: experiments/ddd_3dop.sh
================================================
cd src
# train
python main.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
# test
python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --resume
cd ..
================================================
FILE: experiments/ddd_sub.sh
================================================
cd src
# train
python main.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
# test
python test.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --resume
cd ..
================================================
FILE: experiments/exdet_coco_dla.sh
================================================
cd src
# train
python main.py exdet --exp_id coco_dla --batch_size 64 --master_batch 1 --lr 2.5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 8
# test
python test.py exdet --exp_id coco_dla --keep_res --resume
# flip test
python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test
# multi scale test
python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/exdet_coco_hg.sh
================================================
cd src
# train
python main.py exdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4
# test
python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume
# flip test
python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test
# multi scale test
python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
cd ..
================================================
FILE: experiments/multi_pose_dla_1x.sh
================================================
cd src
# train
python main.py multi_pose --exp_id dla_1x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16
# test
python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume
# flip test
python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume --flip_test
cd ..
================================================
FILE: experiments/multi_pose_dla_3x.sh
================================================
cd src
# train
python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 320 lr_step 270,300
# or use the following command if your have dla_1x trained
# python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/multi_pose/dla_1x/model_90.pth --resume
# test
python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume
# flip test
python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume --flip_test
cd ..
================================================
FILE: experiments/multi_pose_hg_1x.sh
================================================
cd src
# train
python main.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 50 --lr_step 40
# test
python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume
# flip test
python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
cd ..
================================================
FILE: experiments/multi_pose_hg_3x.sh
================================================
cd src
# train
python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 -load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130
# or use the following command if your have dla_1x trained
# python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130 --load_model ../exp/multi_pose/hg_1x/model_40.pth --resume
# test
python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume
# flip test
python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
cd ..
================================================
FILE: images/NOTICE
================================================
The demo images are licensed as United States government work:
https://www.usa.gov/government-works
The image files were obtained on Jan 13, 2018 from the following
URLs.
16004479832_a748d55f21_k.jpg
https://www.flickr.com/photos/archivesnews/16004479832
18124840932_e42b3e377c_k.jpg
https://www.flickr.com/photos/usnavy/18124840932
33887522274_eebd074106_k.jpg
https://www.flickr.com/photos/usaid_pakistan/33887522274
15673749081_767a7fa63a_k.jpg
https://www.flickr.com/photos/usnavy/15673749081
34501842524_3c858b3080_k.jpg
https://www.flickr.com/photos/departmentofenergy/34501842524
24274813513_0cfd2ce6d0_k.jpg
https://www.flickr.com/photos/dhsgov/24274813513
19064748793_bb942deea1_k.jpg
https://www.flickr.com/photos/statephotos/19064748793
33823288584_1d21cf0a26_k.jpg
https://www.flickr.com/photos/cbpphotos/33823288584
17790319373_bd19b24cfc_k.jpg
https://www.flickr.com/photos/secdef/17790319373
================================================
FILE: models/.gitignore
================================================
*
!.gitignore
================================================
FILE: readme/DATA.md
================================================
# Dataset preparation
If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
### COCO
- Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
- Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download).
- Place the data (or create symlinks) to make the data folder like:
~~~
${CenterNet_ROOT}
|-- data
`-- |-- coco
`-- |-- annotations
| |-- instances_train2017.json
| |-- instances_val2017.json
| |-- person_keypoints_train2017.json
| |-- person_keypoints_val2017.json
| |-- image_info_test-dev2017.json
|---|-- train2017
|---|-- val2017
`---|-- test2017
~~~
- [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation:
~~~
cd $CenterNet_ROOT/tools/
python gen_coco_extreme_points.py
~~~
It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`.
### Pascal VOC
- Run
~~~
cd $CenterNet_ROOT/tools/
bash get_pascal_voc.sh
~~~
- The above script includes:
- Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/).
- [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)).
- Combine train/val 2007/2012 annotation files into a single json.
- Move the created `voc` folder to `data` (or create symlinks) to make the data folder like:
~~~
${CenterNet_ROOT}
|-- data
`-- |-- voc
`-- |-- annotations
| |-- pascal_trainval0712.json
| |-- pascal_test2017.json
|-- images
| |-- 000001.jpg
| ......
`-- VOCdevkit
~~~
The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py).
### KITTI
- Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip.
- Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below
~~~
${CenterNet_ROOT}
|-- data
`-- |-- kitti
`-- |-- training
| |-- image_2
| |-- label_2
| |-- calib
|-- ImageSets_3dop
| |-- test.txt
| |-- train.txt
| |-- val.txt
| |-- trainval.txt
`-- ImageSets_subcnn
|-- test.txt
|-- train.txt
|-- val.txt
|-- trainval.txt
~~~
- Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation.
- Link image folder
~~~
cd ${CenterNet_ROOT}/data/kitti/
mkdir images
ln -s training/image_2 images/trainval
~~~
- The data structure should look like:
~~~
${CenterNet_ROOT}
|-- data
`-- |-- kitti
`-- |-- annotations
| |-- kitti_3dop_train.json
| |-- kitti_3dop_val.json
| |-- kitti_subcnn_train.json
| |-- kitti_subcnn_val.json
`-- images
|-- trainval
|-- test
~~~
================================================
FILE: readme/DEVELOP.md
================================================
# Develop
This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
## New dataset
Basically there are three steps:
- Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
- Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
- Import your dataset at `src/lib/datasets/dataset_factory`.
## New task
You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
## New architecture
- Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
- Add your model in `model_factory` of `src/lib/models/model.py`.
================================================
FILE: readme/GETTING_STARTED.md
================================================
# Getting Started
This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
## Benchmark evaluation
First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
### COCO
To evaluate COCO object detection with DLA
run
~~~
python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
~~~
This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
To test with hourglass net, run
~~~
python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
~~~
Similarly, to evaluate human pose estimation, run the following command for dla
~~~
python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
~~~
and the following for hourglass
~~~
python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
~~~
The expected results can be found in the model zoo.
### Pascal
To evaluate object detection on Pascal VOC (test2007), run
~~~
python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
~~~
Note that we fix the resolution during testing.
And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
### KITTI
To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
~~~
cd CenterNet_ROOT/src/tools/kitti_eval
g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
~~~
Then run the evaluation with pretrained model:
~~~
python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
~~~
to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
Note that test time augmentation is not trivially applicable for 3D orientation.
## Training
We have packed all the training scripts in the [experiments](../experiments) folder.
The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
The number of GPUs for each experiments can be found in the scripts and the model zoo.
In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
For example, to train COCO object detection with dla on 2 GPUs, run
~~~
python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1
~~~
The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
By default, pytorch evenly splits the total batch size to each GPUs.
`--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1JMbHgN4uLkP9MAyJU5EeHrgxwe101hwO) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
================================================
FILE: readme/INSTALL.md
================================================
# Installation
The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing.
After install Anaconda:
0. [Optional but recommended] create a new conda environment.
~~~
conda create --name CenterNet python=3.6
~~~
And activate the environment.
~~~
conda activate CenterNet
~~~
1. Install pytorch0.4.1:
~~~
conda install pytorch=0.4.1 torchvision -c pytorch
~~~
And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)).
~~~
# PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/
# for pytorch v0.4.0
sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
# for pytorch v0.4.1
sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
~~~
For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so.
2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
~~~
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
make
python setup.py install --user
~~~
3. Clone this repo:
~~~
CenterNet_ROOT=/path/to/clone/CenterNet
git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT
~~~
4. Install the requirements
~~~
pip install -r requirements.txt
~~~
5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
~~~
cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
./make.sh
~~~
6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
~~~
cd $CenterNet_ROOT/src/lib/external
make
~~~
7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).
================================================
FILE: readme/MODEL_ZOO.md
================================================
# MODEL ZOO
### Common settings and notes
- The experiments are run with pytorch 0.4.1, CUDA 9.0, and CUDNN 7.1.
- Training times are measured on our servers with 8 TITAN V GPUs (12 GB Memeory).
- Testing times are measured on our local machine with TITAN Xp GPU.
- The models can be downloaded directly from [Google drive](https://drive.google.com/drive/folders/1S3NnppRgXea_IG4WeyquJcnOB3I6G-LX).
## Object Detection
### COCO
| Model | GPUs |Train time(h)| Test time (ms) | AP | Download |
|--------------------------|------|-------------|----------------|--------------------|-----------|
|[ctdet\_coco\_hg](../experiments/ctdet_coco_hg.sh) | 5 |109 | 71 / 129 / 674 | 40.3 / 42.2 / 45.1 | [model](https://drive.google.com/file/d/13F904yXltGIqa_K3g3-FLleaNX0n7c9O) |
|[ctdet\_coco\_dla\_1x](../experiments/ctdet_coco_dla_1x.sh) | 8 | 57 | 19 / 36 / 248 | 36.3 / 38.2 / 40.7 | [model](https://drive.google.com/file/d/1xqFsdA3DANMq1MfnXG8n7nQPe4AfXTHv) |
|[ctdet\_coco\_dla\_2x](../experiments/ctdet_coco_dla_2x.sh) | 8 | 92 | 19 / 36 / 248 | 37.4 / 39.2 / 41.7 | [model](https://drive.google.com/file/d/18Q3fzzAsha_3Qid6mn4jcIFPeOGUaj1d) |
|[ctdet\_coco\_resdcn101](../experiments/ctdet_coco_resdcn101.sh)| 8 | 65 | 22 / 40 / 259 | 34.6 / 36.2 / 39.3 | [model](https://drive.google.com/file/d/1tKkSyzC3iWmM6XTYNJrC4XLCIToDmnHz) |
|[ctdet\_coco\_resdcn18](../experiments/ctdet_coco_resdcn18.sh) | 4 | 28 | 7 / 14 / 81 | 28.1 / 30.0 / 33.2 | [model](https://drive.google.com/file/d/1RtFps3kQAyLjQyzCao7pPDclOBQ64Vyp) |
|[exdet\_coco\_hg](../experiments/exdet_coco_hg.sh) | 5 |215 | 134 / 246/1340 | 35.8 / 39.8 / 42.4 | [model](https://drive.google.com/file/d/1gf9bVWs9htzOaQiAF_mqaa2SCFTvwNvh) |
|[exdet\_coco\_dla](../experiments/exdet_coco_dla.sh) | 8 |133 | 51 / 90 / 481 | 33.0 / 36.5 / 38.5 | [model](https://drive.google.com/file/d/1RtFps3kQAyLjQyzCao7pPDclOBQ64Vyp) |
#### Notes
- All models are trained on COCO train 2017 and evaluated on val 2017.
- We show test time and AP with no augmentation / flip augmentation / multi scale (0.5, 0.75, 1, 1.25, 1.5) augmentation.
- Results on COCO test-dev can be found in the paper or add `--trainval` for `test.py`.
- exdet is our re-implementation of [ExtremeNet](https://github.com/xingyizhou/ExtremeNet). The testing does not include edge aggregation.
- For dla and resnets, `1x` means the training schedule that train 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch (following [SimpleBaseline](https://github.com/Microsoft/human-pose-estimation.pytorch)). `2x` means train 230 epochs with learning rate dropped 10 times at the 180 and 210 epoch. The training schedules are **not** carefully investigated.
- The hourglass trained schedule follows [ExtremeNet](https://github.com/xingyizhou/ExtremeNet): trains 50 epochs (approximately 250000 iterations in batch size 24) and drops learning rate at the 40 epoch.
- Testing time include network forwarding time, decoding time, and nms time (for ExtremeNet).
- We observed up to 0.4 AP performance jitter due to randomness in training.
### Pascal VOC
| Model |GPUs| Train time (h)| Test time (ms) | mAP | Download |
|---------------------------------|----|---------------|----------------|------|-----------|
|[ctdet\_pascal\_dla\_384](../experiments/ctdet_pascal_dla_384.sh) | 1 |15 | 20 | 79.3 | [model](https://drive.google.com/file/d/19J7WoUZKYiSEU7vrhZInz5C2m0-9zmC1) |
|[ctdet\_pascal\_dla\_512](../experiments/ctdet_pascal_dla_512.sh) | 2 |15 | 30 | 80.7 | [model](https://drive.google.com/file/d/1Iqqr_VLtG8T3tUzy-EuMaeuCtFuv6pEU) |
|[ctdet\_pascal\_resdcn18\_384](../experiments/ctdet_pascal_resdcn18_384.sh) | 1 |3 | 7 | 72.6 | [model](https://drive.google.com/file/d/1mpeslWUt0aJqx99_-UcCzJ7GBcD6sCA-) |
|[ctdet\_pascal\_resdcn18\_512](../experiments/ctdet_pascal_resdcn18_512.sh) | 1 |5 | 10 | 75.7 | [model](https://drive.google.com/file/d/1ILnmGrJQiDK2Ib-D7TzBs2fq5bfju-Bd) |
|[ctdet\_pascal\_resdcn101\_384](../experiments/ctdet_pascal_resdcn101_384.sh)| 2 |7 | 22 | 77.1 | [model](https://drive.google.com/file/d/1-Qy4zMhzmBk9fMF0u2s5zNI0Uj9latsk) |
|[ctdet\_pascal\_resdcn101\_512](../experiments/ctdet_pascal_resdcn101_512.sh)| 4 |7 | 33 | 78.7 | [model](https://drive.google.com/file/d/1TiJkPaofzUaEMLi5wr9TIIUY8G2VewCQ) |
#### Notes
- All models are trained on trainval 07+12 and tested on test 2007.
- Flip test is used by default.
- Training schedule: train for 70 epochs with learning rate dropped 10 times at the 45 and 60 epoch.
- We observed up to 1 mAP performance jitter due to randomness in training.
## Human pose estimation
### COCO
| Model | GPUs |Train time(h)| Test time (ms) | AP | Download |
|--------------------------|------|-------------|----------------|-------------|-----------|
|[multi\_pose\_hg_1x](../experiments/multi_pose_hg_1x.sh) | 5 |62 | 151 | 58.7 | [model](https://drive.google.com/file/d/1neg1zfVjRTS45FCdk5hgIV-TxJ0McAew) |
|[multi\_pose\_hg_3x](../experiments/multi_pose_hg_3x.sh) | 5 |188 | 151 | 64.0 | [model](https://drive.google.com/file/d/17-YS11iguOQKwPPHUg8LzakLzrgKSLfA) |
|[multi\_pose\_dla_1x](../experiments/multi_pose_dla_1x.sh) | 8 |30 | 44 | 54.7 | [model](https://drive.google.com/file/d/1WKqVdkIew43SxLTSxi81a-_f4Q9v2Fx5) |
|[multi\_pose\_dla_3x](../experiments/multi_pose_dla_3x.sh) | 8 |70 | 44 | 58.9 | [model](https://drive.google.com/file/d/1mC2PAQT_RuHi_9ZMZgkt4rg7BSY2_Lkd) |
#### Notes
- All models are trained on keypoint train 2017 images which contains at least one human with keypoint annotations (64115 images).
- The evaluation is done on COCO keypoint val 2017 (5000 images).
- Flip test is used by default.
- The models are fine-tuned from the corresponding center point detection models.
- Dla training schedule: `1x`: train for 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch.`3x`: train for 320 epochs with learning rate dropped 10 times at the 270 and 300 epoch.
- Hourglass training schedule: `1x`: train for 50 epochs with learning rate dropped 10 times at the 40 epoch.`3x`: train for 150 epochs with learning rate dropped 10 times at the 130 epoch.
## 3D bounding box detection
#### Notes
- The 3dop split is from [3DOP](https://papers.nips.cc/paper/5644-3d-object-proposals-for-accurate-object-class-detection) and the suborn split is from [SubCNN](https://github.com/tanshen/SubCNN).
- No augmentation is used in testing.
- The models are trained for 70 epochs with learning rate dropped at the 45 and 60 epoch.
### KITTI 3DOP split
|Model |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download |
|------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------|
|[ddd_3dop](../experiments/ddd_3dop.sh)|2 | 7h | 31ms |96.9|87.8|79.2|93.9 |84.3 |75.7 |34.0 |30.5 |26.8 | [model](https://drive.google.com/file/d/1LrAzVJqlZECVuyr_NJI_4xd88mA1fL5b)|
### KITTI SubCNN split
|Model |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download |
|------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------|
|[ddd_sub](../experiments/ddd_sub.sh) |2 | 7h | 31ms |89.6|79.8|70.3|85.7 |75.2 |65.9 |34.9 |27.7 |26.4 | [model](https://drive.google.com/file/d/1ZUsUqrM_yTjxaJuDBxm0WTKXj1snVuvP)|
================================================
FILE: requirements.txt
================================================
opencv-python
Cython
numba
progress
matplotlib
easydict
scipy
================================================
FILE: src/_init_paths.py
================================================
import os.path as osp
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = osp.dirname(__file__)
# Add lib to PYTHONPATH
lib_path = osp.join(this_dir, 'lib')
add_path(lib_path)
================================================
FILE: src/demo.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import cv2
from opts import opts
from detectors.detector_factory import detector_factory
image_ext = ['jpg', 'jpeg', 'png', 'webp']
video_ext = ['mp4', 'mov', 'avi', 'mkv']
time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
def demo(opt):
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
opt.debug = max(opt.debug, 1)
Detector = detector_factory[opt.task]
detector = Detector(opt)
if opt.demo == 'webcam' or \
opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
detector.pause = False
while True:
_, img = cam.read()
cv2.imshow('input', img)
ret = detector.run(img)
time_str = ''
for stat in time_stats:
time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
print(time_str)
if cv2.waitKey(1) == 27:
return # esc to quit
else:
if os.path.isdir(opt.demo):
image_names = []
ls = os.listdir(opt.demo)
for file_name in sorted(ls):
ext = file_name[file_name.rfind('.') + 1:].lower()
if ext in image_ext:
image_names.append(os.path.join(opt.demo, file_name))
else:
image_names = [opt.demo]
for (image_name) in image_names:
ret = detector.run(image_name)
time_str = ''
for stat in time_stats:
time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
print(time_str)
if __name__ == '__main__':
opt = opts().init()
demo(opt)
================================================
FILE: src/lib/datasets/dataset/coco.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class COCO(data.Dataset):
num_classes = 80
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(COCO, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'coco')
self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'image_info_test-dev2017.json').format(split)
else:
if opt.task == 'exdet':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'instances_extreme_{}2017.json').format(split)
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'instances_{}2017.json').format(split)
self.max_objs = 128
self.class_name = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
self._valid_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90]
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
for v in range(1, self.num_classes + 1)]
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
# self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
# self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
self.split = split
self.opt = opt
print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds()
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = self._valid_ids[cls_ind - 1]
for bbox in all_bboxes[image_id][cls_ind]:
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox_out = list(map(self._to_float, bbox[0:4]))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score))
}
if len(bbox) > 5:
extreme_points = list(map(self._to_float, bbox[5:13]))
detection["extreme_points"] = extreme_points
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
================================================
FILE: src/lib/datasets/dataset/coco_hp.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class COCOHP(data.Dataset):
num_classes = 1
num_joints = 17
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
[11, 12], [13, 14], [15, 16]]
def __init__(self, opt, split):
super(COCOHP, self).__init__()
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
[4, 6], [3, 5], [5, 6],
[5, 7], [7, 9], [6, 8], [8, 10],
[6, 12], [5, 11], [11, 12],
[12, 14], [14, 16], [11, 13], [13, 15]]
self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
self.data_dir = os.path.join(opt.data_dir, 'coco')
self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'image_info_test-dev2017.json').format(split)
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'person_keypoints_{}2017.json').format(split)
self.max_objs = 32
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
image_ids = self.coco.getImgIds()
if split == 'train':
self.images = []
for img_id in image_ids:
idxs = self.coco.getAnnIds(imgIds=[img_id])
if len(idxs) > 0:
self.images.append(img_id)
else:
self.images = image_ids
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = 1
for dets in all_bboxes[image_id][cls_ind]:
bbox = dets[:4]
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = dets[4]
bbox_out = list(map(self._to_float, bbox))
keypoints = np.concatenate([
np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
keypoints = list(map(self._to_float, keypoints))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score)),
"keypoints": keypoints
}
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(opt.save_dir, "results.json")
# detections = convert_eval_format(all_boxes)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
================================================
FILE: src/lib/datasets/dataset/kitti.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import pycocotools.coco as coco
import numpy as np
import torch
import json
import cv2
import os
import math
import torch.utils.data as data
class KITTI(data.Dataset):
num_classes = 3
default_resolution = [384, 1280]
mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(KITTI, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'kitti')
self.img_dir = os.path.join(self.data_dir, 'images', 'trainval')
if opt.trainval:
split = 'trainval' if split == 'train' else 'test'
self.img_dir = os.path.join(self.data_dir, 'images', split)
self.annot_path = os.path.join(
self.data_dir, 'annotations', 'kitti_{}.json').format(split)
else:
self.annot_path = os.path.join(self.data_dir,
'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split)
self.max_objs = 50
self.class_name = [
'__background__', 'Pedestrian', 'Car', 'Cyclist']
self.cat_ids = {1:0, 2:1, 3:2, 4:-3, 5:-3, 6:-2, 7:-99, 8:-99, 9:-1}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
self.alpha_in_degree = False
print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds()
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def __len__(self):
return self.num_samples
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
pass
def save_results(self, results, save_dir):
results_dir = os.path.join(save_dir, 'results')
if not os.path.exists(results_dir):
os.mkdir(results_dir)
for img_id in results.keys():
out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
f = open(out_path, 'w')
for cls_ind in results[img_id]:
for j in range(len(results[img_id][cls_ind])):
class_name = self.class_name[cls_ind]
f.write('{} 0.0 0'.format(class_name))
for i in range(len(results[img_id][cls_ind][j])):
f.write(' {:.2f}'.format(results[img_id][cls_ind][j][i]))
f.write('\n')
f.close()
def run_eval(self, results, save_dir):
self.save_results(results, save_dir)
os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \
'../data/kitti/training/label_val ' + \
'{}/results/'.format(save_dir))
================================================
FILE: src/lib/datasets/dataset/pascal.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
import numpy as np
import torch
import json
import os
import torch.utils.data as data
class PascalVOC(data.Dataset):
num_classes = 20
default_resolution = [384, 384]
mean = np.array([0.485, 0.456, 0.406],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(PascalVOC, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'voc')
self.img_dir = os.path.join(self.data_dir, 'images')
_ann_name = {'train': 'trainval0712', 'val': 'test2007'}
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'pascal_{}.json').format(_ann_name[split])
self.max_objs = 50
self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
"train", "tvmonitor"]
self._valid_ids = np.arange(1, 21, dtype=np.int32)
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing pascal {} data.'.format(_ann_name[split]))
self.coco = coco.COCO(self.annot_path)
self.images = sorted(self.coco.getImgIds())
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
detections = [[[] for __ in range(self.num_samples)] \
for _ in range(self.num_classes + 1)]
for i in range(self.num_samples):
img_id = self.images[i]
for j in range(1, self.num_classes + 1):
if isinstance(all_bboxes[img_id][j], np.ndarray):
detections[j][i] = all_bboxes[img_id][j].tolist()
else:
detections[j][i] = all_bboxes[img_id][j]
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
os.system('python tools/reval.py ' + \
'{}/results.json'.format(save_dir))
================================================
FILE: src/lib/datasets/dataset_factory.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .sample.ddd import DddDataset
from .sample.exdet import EXDetDataset
from .sample.ctdet import CTDetDataset
from .sample.multi_pose import MultiPoseDataset
from .dataset.coco import COCO
from .dataset.pascal import PascalVOC
from .dataset.kitti import KITTI
from .dataset.coco_hp import COCOHP
dataset_factory = {
'coco': COCO,
'pascal': PascalVOC,
'kitti': KITTI,
'coco_hp': COCOHP
}
_sample_factory = {
'exdet': EXDetDataset,
'ctdet': CTDetDataset,
'ddd': DddDataset,
'multi_pose': MultiPoseDataset
}
def get_dataset(dataset, task):
class Dataset(dataset_factory[dataset], _sample_factory[task]):
pass
return Dataset
================================================
FILE: src/lib/datasets/sample/ctdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
import math
class CTDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
if self.opt.keep_res:
input_h = (height | self.opt.pad) + 1
input_w = (width | self.opt.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
else:
s = max(img.shape[0], img.shape[1]) * 1.0
input_h, input_w = self.opt.input_h, self.opt.input_w
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
trans_input = get_affine_transform(
c, s, 0, [input_w, input_h])
inp = cv2.warpAffine(img, trans_input,
(input_w, input_h),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_h = input_h // self.opt.down_ratio
output_w = input_w // self.opt.down_ratio
num_classes = self.num_classes
trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
radius = self.opt.hm_gauss if self.opt.mse_loss else radius
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
draw_gaussian(hm[cls_id], ct_int, radius)
wh[k] = 1. * w, 1. * h
ind[k] = ct_int[1] * output_w + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1
cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
if self.opt.dense_wh:
draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
if self.opt.dense_wh:
hm_a = hm.max(axis=0, keepdims=True)
dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
del ret['wh']
elif self.opt.cat_spec_wh:
ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
del ret['wh']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 6), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret
================================================
FILE: src/lib/datasets/sample/ddd.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import pycocotools.coco as coco
import numpy as np
import torch
import json
import cv2
import os
import math
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
import pycocotools.coco as coco
class DddDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _convert_alpha(self, alpha):
return math.radians(alpha + 45) if self.alpha_in_degree else alpha
def __getitem__(self, index):
img_id = self.images[index]
img_info = self.coco.loadImgs(ids=[img_id])[0]
img_path = os.path.join(self.img_dir, img_info['file_name'])
img = cv2.imread(img_path)
if 'calib' in img_info:
calib = np.array(img_info['calib'], dtype=np.float32)
else:
calib = self.calib
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
if self.opt.keep_res:
s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
else:
s = np.array([width, height], dtype=np.int32)
aug = False
if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
aug = True
sf = self.opt.scale
cf = self.opt.shift
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
trans_input = get_affine_transform(
c, s, 0, [self.opt.input_w, self.opt.input_h])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_w, self.opt.input_h),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
# if self.split == 'train' and not self.opt.no_color_aug:
# color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
num_classes = self.opt.num_classes
trans_output = get_affine_transform(
c, s, 0, [self.opt.output_w, self.opt.output_h])
hm = np.zeros(
(num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
dep = np.zeros((self.max_objs, 1), dtype=np.float32)
rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
dim = np.zeros((self.max_objs, 3), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
rot_mask = np.zeros((self.max_objs), dtype=np.uint8)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if cls_id <= -99:
continue
# if flipped:
# bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((h, w))
radius = max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
if cls_id < 0:
ignore_id = [_ for _ in range(num_classes)] \
if cls_id == - 1 else [- cls_id - 2]
if self.opt.rect_mask:
hm[ignore_id, int(bbox[1]): int(bbox[3]) + 1,
int(bbox[0]): int(bbox[2]) + 1] = 0.9999
else:
for cc in ignore_id:
draw_gaussian(hm[cc], ct, radius)
hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
continue
draw_gaussian(hm[cls_id], ct, radius)
wh[k] = 1. * w, 1. * h
gt_det.append([ct[0], ct[1], 1] + \
self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \
[ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
if self.opt.reg_bbox:
gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
# if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!!
if 1:
alpha = self._convert_alpha(ann['alpha'])
# print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y'])
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
rotbin[k, 0] = 1
rotres[k, 0] = alpha - (-0.5 * np.pi)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
rotbin[k, 1] = 1
rotres[k, 1] = alpha - (0.5 * np.pi)
dep[k] = ann['depth']
dim[k] = ann['dim']
# print(' cat dim', cls_id, dim[k])
ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1 if not aug else 0
rot_mask[k] = 1
# print('gt_det', gt_det)
# print('')
ret = {'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind,
'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask,
'rot_mask': rot_mask}
if self.opt.reg_bbox:
ret.update({'wh': wh})
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not ('train' in self.split):
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 18), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'calib': calib,
'image_path': img_path, 'img_id': img_id}
ret['meta'] = meta
return ret
def _alpha_to_8(self, alpha):
# return [alpha, 0, 0, 0, 0, 0, 0, 0]
ret = [0, 0, 0, 1, 0, 0, 0, 1]
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
r = alpha - (-0.5 * np.pi)
ret[1] = 1
ret[2], ret[3] = np.sin(r), np.cos(r)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
r = alpha - (0.5 * np.pi)
ret[5] = 1
ret[6], ret[7] = np.sin(r), np.cos(r)
return ret
================================================
FILE: src/lib/datasets/sample/exdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import pycocotools.coco as coco
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
import pycocotools.coco as coco
import math
class EXDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
img_info = self.coco.loadImgs(ids=[img_id])[0]
img_path = os.path.join(self.img_dir, img_info['file_name'])
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
s = max(img.shape[0], img.shape[1]) * 1.0
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
trans_input = get_affine_transform(
c, s, 0, [self.opt.input_res, self.opt.input_res])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_res, self.opt.input_res),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_res = self.opt.output_res
num_classes = self.opt.num_classes
trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
num_hm = 1 if self.opt.agnostic_ex else num_classes
hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_c = np.zeros((num_classes, output_res, output_res), dtype=np.float32)
reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
ind_t = np.zeros((self.max_objs), dtype=np.int64)
ind_l = np.zeros((self.max_objs), dtype=np.int64)
ind_b = np.zeros((self.max_objs), dtype=np.int64)
ind_r = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
for k in range(num_objs):
ann = anns[k]
# bbox = self._coco_box_to_bbox(ann['bbox'])
# tlbr
pts = np.array(ann['extreme_points'], dtype=np.float32).reshape(4, 2)
# cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
cls_id = int(self.cat_ids[ann['category_id']])
hm_id = 0 if self.opt.agnostic_ex else cls_id
if flipped:
pts[:, 0] = width - pts[:, 0] - 1
pts[1], pts[3] = pts[3].copy(), pts[1].copy()
for j in range(4):
pts[j] = affine_transform(pts[j], trans_output)
pts = np.clip(pts, 0, self.opt.output_res - 1)
h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
pt_int = pts.astype(np.int32)
draw_gaussian(hm_t[hm_id], pt_int[0], radius)
draw_gaussian(hm_l[hm_id], pt_int[1], radius)
draw_gaussian(hm_b[hm_id], pt_int[2], radius)
draw_gaussian(hm_r[hm_id], pt_int[3], radius)
reg_t[k] = pts[0] - pt_int[0]
reg_l[k] = pts[1] - pt_int[1]
reg_b[k] = pts[2] - pt_int[2]
reg_r[k] = pts[3] - pt_int[3]
ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]
ct = [int((pts[3, 0] + pts[1, 0]) / 2), int((pts[0, 1] + pts[2, 1]) / 2)]
draw_gaussian(hm_c[cls_id], ct, radius)
reg_mask[k] = 1
ret = {'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b,
'hm_r': hm_r, 'hm_c': hm_c}
if self.opt.reg_offset:
ret.update({'reg_mask': reg_mask,
'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r,
'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r})
return ret
================================================
FILE: src/lib/datasets/sample/multi_pose.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
import math
class MultiPoseDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
s = max(img.shape[0], img.shape[1]) * 1.0
rot = 0
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.aug_rot:
rf = self.opt.rotate
rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
trans_input = get_affine_transform(
c, s, rot, [self.opt.input_res, self.opt.input_res])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_res, self.opt.input_res),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_res = self.opt.output_res
num_joints = self.num_joints
trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])
trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32)
hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
dense_kps = np.zeros((num_joints, 2, output_res, output_res),
dtype=np.float32)
dense_kps_mask = np.zeros((num_joints, output_res, output_res),
dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8)
hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(ann['category_id']) - 1
pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
pts[:, 0] = width - pts[:, 0] - 1
for e in self.flip_idx:
pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox = np.clip(bbox, 0, output_res - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if (h > 0 and w > 0) or (rot != 0):
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
wh[k] = 1. * w, 1. * h
ind[k] = ct_int[1] * output_res + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1
num_kpts = pts[:, 2].sum()
if num_kpts == 0:
hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
reg_mask[k] = 0
hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
hp_radius = self.opt.hm_gauss \
if self.opt.mse_loss else max(0, int(hp_radius))
for j in range(num_joints):
if pts[j, 2] > 0:
pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
pts[j, 1] >= 0 and pts[j, 1] < output_res:
kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
kps_mask[k, j * 2: j * 2 + 2] = 1
pt_int = pts[j, :2].astype(np.int32)
hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0]
hp_mask[k * num_joints + j] = 1
if self.opt.dense_hp:
# must be before draw center hm gaussian
draw_dense_reg(dense_kps[j], hm[cls_id], ct_int,
pts[j, :2] - ct_int, radius, is_offset=True)
draw_gaussian(dense_kps_mask[j], ct_int, radius)
draw_gaussian(hm_hp[j], pt_int, hp_radius)
draw_gaussian(hm[cls_id], ct_int, radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1] +
pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
if rot != 0:
hm = hm * 0 + 0.9999
reg_mask *= 0
kps_mask *= 0
ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
'hps': kps, 'hps_mask': kps_mask}
if self.opt.dense_hp:
dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res)
dense_kps_mask = dense_kps_mask.reshape(
num_joints, 1, output_res, output_res)
dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
dense_kps_mask = dense_kps_mask.reshape(
num_joints * 2, output_res, output_res)
ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask})
del ret['hps'], ret['hps_mask']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.hm_hp:
ret.update({'hm_hp': hm_hp})
if self.opt.reg_hp_offset:
ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 40), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret
================================================
FILE: src/lib/detectors/base_detector.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from progress.bar import Bar
import time
import torch
from models.model import create_model, load_model
from utils.image import get_affine_transform
from utils.debugger import Debugger
class BaseDetector(object):
def __init__(self, opt):
if opt.gpus[0] >= 0:
opt.device = torch.device('cuda')
else:
opt.device = torch.device('cpu')
print('Creating model...')
self.model = create_model(opt.arch, opt.heads, opt.head_conv)
self.model = load_model(self.model, opt.load_model)
self.model = self.model.to(opt.device)
self.model.eval()
self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
self.max_per_image = 100
self.num_classes = opt.num_classes
self.scales = opt.test_scales
self.opt = opt
self.pause = True
def pre_process(self, image, scale, meta=None):
height, width = image.shape[0:2]
new_height = int(height * scale)
new_width = int(width * scale)
if self.opt.fix_res:
inp_height, inp_width = self.opt.input_h, self.opt.input_w
c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
s = max(height, width) * 1.0
else:
inp_height = (new_height | self.opt.pad) + 1
inp_width = (new_width | self.opt.pad) + 1
c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
s = np.array([inp_width, inp_height], dtype=np.float32)
trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
resized_image = cv2.resize(image, (new_width, new_height))
inp_image = cv2.warpAffine(
resized_image, trans_input, (inp_width, inp_height),
flags=cv2.INTER_LINEAR)
inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
if self.opt.flip_test:
images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
images = torch.from_numpy(images)
meta = {'c': c, 's': s,
'out_height': inp_height // self.opt.down_ratio,
'out_width': inp_width // self.opt.down_ratio}
return images, meta
def process(self, images, return_time=False):
raise NotImplementedError
def post_process(self, dets, meta, scale=1):
raise NotImplementedError
def merge_outputs(self, detections):
raise NotImplementedError
def debug(self, debugger, images, dets, output, scale=1):
raise NotImplementedError
def show_results(self, debugger, image, results):
raise NotImplementedError
def run(self, image_or_path_or_tensor, meta=None):
load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
merge_time, tot_time = 0, 0
debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
theme=self.opt.debugger_theme)
start_time = time.time()
pre_processed = False
if isinstance(image_or_path_or_tensor, np.ndarray):
image = image_or_path_or_tensor
elif type(image_or_path_or_tensor) == type (''):
image = cv2.imread(image_or_path_or_tensor)
else:
image = image_or_path_or_tensor['image'][0].numpy()
pre_processed_images = image_or_path_or_tensor
pre_processed = True
loaded_time = time.time()
load_time += (loaded_time - start_time)
detections = []
for scale in self.scales:
scale_start_time = time.time()
if not pre_processed:
images, meta = self.pre_process(image, scale, meta)
else:
# import pdb; pdb.set_trace()
images = pre_processed_images['images'][scale][0]
meta = pre_processed_images['meta'][scale]
meta = {k: v.numpy()[0] for k, v in meta.items()}
images = images.to(self.opt.device)
torch.cuda.synchronize()
pre_process_time = time.time()
pre_time += pre_process_time - scale_start_time
output, dets, forward_time = self.process(images, return_time=True)
torch.cuda.synchronize()
net_time += forward_time - pre_process_time
decode_time = time.time()
dec_time += decode_time - forward_time
if self.opt.debug >= 2:
self.debug(debugger, images, dets, output, scale)
dets = self.post_process(dets, meta, scale)
torch.cuda.synchronize()
post_process_time = time.time()
post_time += post_process_time - decode_time
detections.append(dets)
results = self.merge_outputs(detections)
torch.cuda.synchronize()
end_time = time.time()
merge_time += end_time - post_process_time
tot_time += end_time - start_time
if self.opt.debug >= 1:
self.show_results(debugger, image, results)
return {'results': results, 'tot': tot_time, 'load': load_time,
'pre': pre_time, 'net': net_time, 'dec': dec_time,
'post': post_time, 'merge': merge_time}
================================================
FILE: src/lib/detectors/ctdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from progress.bar import Bar
import time
import torch
try:
from external.nms import soft_nms
except:
print('NMS not imported! If you need it,'
' do \n cd $CenterNet_ROOT/src/lib/external \n make')
from models.decode import ctdet_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform
from utils.post_process import ctdet_post_process
from utils.debugger import Debugger
from .base_detector import BaseDetector
class CtdetDetector(BaseDetector):
def __init__(self, opt):
super(CtdetDetector, self).__init__(opt)
def process(self, images, return_time=False):
with torch.no_grad():
output = self.model(images)[-1]
hm = output['hm'].sigmoid_()
wh = output['wh']
reg = output['reg'] if self.opt.reg_offset else None
if self.opt.flip_test:
hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
reg = reg[0:1] if reg is not None else None
torch.cuda.synchronize()
forward_time = time.time()
dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy()
dets = dets.reshape(1, -1, dets.shape[2])
dets = ctdet_post_process(
dets.copy(), [meta['c']], [meta['s']],
meta['out_height'], meta['out_width'], self.opt.num_classes)
for j in range(1, self.num_classes + 1):
dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
dets[0][j][:, :4] /= scale
return dets[0]
def merge_outputs(self, detections):
results = {}
for j in range(1, self.num_classes + 1):
results[j] = np.concatenate(
[detection[j] for detection in detections], axis=0).astype(np.float32)
if len(self.scales) > 1 or self.opt.nms:
soft_nms(results[j], Nt=0.5, method=2)
scores = np.hstack(
[results[j][:, 4] for j in range(1, self.num_classes + 1)])
if len(scores) > self.max_per_image:
kth = len(scores) - self.max_per_image
thresh = np.partition(scores, kth)[kth]
for j in range(1, self.num_classes + 1):
keep_inds = (results[j][:, 4] >= thresh)
results[j] = results[j][keep_inds]
return results
def debug(self, debugger, images, dets, output, scale=1):
detection = dets.detach().cpu().numpy().copy()
detection[:, :, :4] *= self.opt.down_ratio
for i in range(1):
img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
for k in range(len(dets[i])):
if detection[i, k, 4] > self.opt.center_thresh:
debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
detection[i, k, 4],
img_id='out_pred_{:.1f}'.format(scale))
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='ctdet')
for j in range(1, self.num_classes + 1):
for bbox in results[j]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet')
debugger.show_all_imgs(pause=self.pause)
================================================
FILE: src/lib/detectors/ddd.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from progress.bar import Bar
import time
import torch
from models.decode import ddd_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform
from utils.post_process import ddd_post_process
from utils.debugger import Debugger
from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
from .base_detector import BaseDetector
class DddDetector(BaseDetector):
def __init__(self, opt):
super(DddDetector, self).__init__(opt)
self.calib = np.array([[707.0493, 0, 604.0814, 45.75831],
[0, 707.0493, 180.5066, -0.3454157],
[0, 0, 1., 0.004981016]], dtype=np.float32)
def pre_process(self, image, scale, calib=None):
height, width = image.shape[0:2]
inp_height, inp_width = self.opt.input_h, self.opt.input_w
c = np.array([width / 2, height / 2], dtype=np.float32)
if self.opt.keep_res:
s = np.array([inp_width, inp_height], dtype=np.int32)
else:
s = np.array([width, height], dtype=np.int32)
trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
resized_image = image #cv2.resize(image, (width, height))
inp_image = cv2.warpAffine(
resized_image, trans_input, (inp_width, inp_height),
flags=cv2.INTER_LINEAR)
inp_image = (inp_image.astype(np.float32) / 255.)
inp_image = (inp_image - self.mean) / self.std
images = inp_image.transpose(2, 0, 1)[np.newaxis, ...]
calib = np.array(calib, dtype=np.float32) if calib is not None \
else self.calib
images = torch.from_numpy(images)
meta = {'c': c, 's': s,
'out_height': inp_height // self.opt.down_ratio,
'out_width': inp_width // self.opt.down_ratio,
'calib': calib}
return images, meta
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
output['hm'] = output['hm'].sigmoid_()
output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
wh = output['wh'] if self.opt.reg_bbox else None
reg = output['reg'] if self.opt.reg_offset else None
torch.cuda.synchronize()
forward_time = time.time()
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=self.opt.K)
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy()
detections = ddd_post_process(
dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt)
self.this_calib = meta['calib']
return detections[0]
def merge_outputs(self, detections):
results = detections[0]
for j in range(1, self.num_classes + 1):
if len(results[j] > 0):
keep_inds = (results[j][:, -1] > self.opt.peak_thresh)
results[j] = results[j][keep_inds]
return results
def debug(self, debugger, images, dets, output, scale=1):
dets = dets.detach().cpu().numpy()
img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_ct_detection(
img, dets[0], show_box=self.opt.reg_bbox,
center_thresh=self.opt.vis_thresh, img_id='det_pred')
def show_results(self, debugger, image, results):
debugger.add_3d_detection(
image, results, self.this_calib,
center_thresh=self.opt.vis_thresh, img_id='add_pred')
debugger.add_bird_view(
results, center_thresh=self.opt.vis_thresh, img_id='bird_pred')
debugger.show_all_imgs(pause=self.pause)
================================================
FILE: src/lib/detectors/detector_factory.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .exdet import ExdetDetector
from .ddd import DddDetector
from .ctdet import CtdetDetector
from .multi_pose import MultiPoseDetector
detector_factory = {
'exdet': ExdetDetector,
'ddd': DddDetector,
'ctdet': CtdetDetector,
'multi_pose': MultiPoseDetector,
}
================================================
FILE: src/lib/detectors/exdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import cv2
import numpy as np
from progress.bar import Bar
import time
import torch
from models.decode import exct_decode, agnex_ct_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform, transform_preds
from utils.post_process import ctdet_post_process
from utils.debugger import Debugger
from .base_detector import BaseDetector
class ExdetDetector(BaseDetector):
def __init__(self, opt):
super(ExdetDetector, self).__init__(opt)
self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
t_heat = output['hm_t'].sigmoid_()
l_heat = output['hm_l'].sigmoid_()
b_heat = output['hm_b'].sigmoid_()
r_heat = output['hm_r'].sigmoid_()
c_heat = output['hm_c'].sigmoid_()
torch.cuda.synchronize()
forward_time = time.time()
if self.opt.reg_offset:
dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat,
output['reg_t'], output['reg_l'],
output['reg_b'], output['reg_r'],
K=self.opt.K,
scores_thresh=self.opt.scores_thresh,
center_thresh=self.opt.center_thresh,
aggr_weight=self.opt.aggr_weight)
else:
dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K,
scores_thresh=self.opt.scores_thresh,
center_thresh=self.opt.center_thresh,
aggr_weight=self.opt.aggr_weight)
if return_time:
return output, dets, forward_time
else:
return output, dets
def debug(self, debugger, images, dets, output, scale=1):
detection = dets.detach().cpu().numpy().copy()
detection[:, :, :4] *= self.opt.down_ratio
for i in range(1):
inp_height, inp_width = images.shape[2], images.shape[3]
pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8)
img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
parts = ['t', 'l', 'b', 'r', 'c']
for p in parts:
tag = 'hm_{}'.format(p)
pred = debugger.gen_colormap(
output[tag][i].detach().cpu().numpy(), (inp_height, inp_width))
if p != 'c':
pred_hm = np.maximum(pred_hm, pred)
else:
debugger.add_blend_img(
img, pred, 'pred_{}_{:.1f}'.format(p, scale))
debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale))
debugger.add_img(img, img_id='out_{:.1f}'.format(scale))
for k in range(len(detection[i])):
# print('detection', detection[i, k, 4], detection[i, k])
if detection[i, k, 4] > 0.01:
# print('detection', detection[i, k, 4], detection[i, k])
debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
detection[i, k, 4],
img_id='out_{:.1f}'.format(scale))
def post_process(self, dets, meta, scale=1):
out_width, out_height = meta['out_width'], meta['out_height']
dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
dets = dets.reshape(1, -1, 14)
dets[0, :, 0:2] = transform_preds(
dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height))
dets[0, :, 2:4] = transform_preds(
dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height))
dets[:, :, 0:4] /= scale
return dets[0]
def merge_outputs(self, detections):
detections = np.concatenate(
[detection for detection in detections], axis=0).astype(np.float32)
classes = detections[..., -1]
keep_inds = (detections[:, 4] > 0)
detections = detections[keep_inds]
classes = classes[keep_inds]
results = {}
for j in range(self.num_classes):
keep_inds = (classes == j)
results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
soft_nms(results[j + 1], Nt=0.5, method=2)
results[j + 1] = results[j + 1][:, 0:5]
scores = np.hstack([
results[j][:, -1]
for j in range(1, self.num_classes + 1)
])
if len(scores) > self.max_per_image:
kth = len(scores) - self.max_per_image
thresh = np.partition(scores, kth)[kth]
for j in range(1, self.num_classes + 1):
keep_inds = (results[j][:, -1] >= thresh)
results[j] = results[j][keep_inds]
return results
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='exdet')
for j in range(1, self.num_classes + 1):
for bbox in results[j]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='exdet')
debugger.show_all_imgs(pause=self.pause)
================================================
FILE: src/lib/detectors/multi_pose.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from progress.bar import Bar
import time
import torch
try:
from external.nms import soft_nms_39
except:
print('NMS not imported! If you need it,'
' do \n cd $CenterNet_ROOT/src/lib/external \n make')
from models.decode import multi_pose_decode
from models.utils import flip_tensor, flip_lr_off, flip_lr
from utils.image import get_affine_transform
from utils.post_process import multi_pose_post_process
from utils.debugger import Debugger
from .base_detector import BaseDetector
class MultiPoseDetector(BaseDetector):
def __init__(self, opt):
super(MultiPoseDetector, self).__init__(opt)
self.flip_idx = opt.flip_idx
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
output['hm'] = output['hm'].sigmoid_()
if self.opt.hm_hp and not self.opt.mse_loss:
output['hm_hp'] = output['hm_hp'].sigmoid_()
reg = output['reg'] if self.opt.reg_offset else None
hm_hp = output['hm_hp'] if self.opt.hm_hp else None
hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
torch.cuda.synchronize()
forward_time = time.time()
if self.opt.flip_test:
output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2
output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2
output['hps'] = (output['hps'][0:1] +
flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2
hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
if hm_hp is not None else None
reg = reg[0:1] if reg is not None else None
hp_offset = hp_offset[0:1] if hp_offset is not None else None
dets = multi_pose_decode(
output['hm'], output['wh'], output['hps'],
reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets = multi_pose_post_process(
dets.copy(), [meta['c']], [meta['s']],
meta['out_height'], meta['out_width'])
for j in range(1, self.num_classes + 1):
dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39)
# import pdb; pdb.set_trace()
dets[0][j][:, :4] /= scale
dets[0][j][:, 5:] /= scale
return dets[0]
def merge_outputs(self, detections):
results = {}
results[1] = np.concatenate(
[detection[1] for detection in detections], axis=0).astype(np.float32)
if self.opt.nms or len(self.opt.test_scales) > 1:
soft_nms_39(results[1], Nt=0.5, method=2)
results[1] = results[1].tolist()
return results
def debug(self, debugger, images, dets, output, scale=1):
dets = dets.detach().cpu().numpy().copy()
dets[:, :, :4] *= self.opt.down_ratio
dets[:, :, 5:39] *= self.opt.down_ratio
img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
if self.opt.hm_hp:
pred = debugger.gen_colormap_hp(
output['hm_hp'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hmhp')
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='multi_pose')
for bbox in results[1]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose')
debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
debugger.show_all_imgs(pause=self.pause)
================================================
FILE: src/lib/external/.gitignore
================================================
bbox.c
bbox.cpython-35m-x86_64-linux-gnu.so
bbox.cpython-36m-x86_64-linux-gnu.so
nms.c
nms.cpython-35m-x86_64-linux-gnu.so
nms.cpython-36m-x86_64-linux-gnu.so
================================================
FILE: src/lib/external/Makefile
================================================
all:
python setup.py build_ext --inplace
rm -rf build
================================================
FILE: src/lib/external/__init__.py
================================================
================================================
FILE: src/lib/external/nms.pyx
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
# ----------------------------------------------------------
# Soft-NMS: Improving Object Detection With One Line of Code
# Copyright (c) University of Maryland, College Park
# Licensed under The MIT License [see LICENSE for details]
# Written by Navaneeth Bodla and Bharat Singh
# ----------------------------------------------------------
import numpy as np
cimport numpy as np
cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
return a if a >= b else b
cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
return a if a <= b else b
def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
cdef int ndets = dets.shape[0]
cdef np.ndarray[np.int_t, ndim=1] suppressed = \
np.zeros((ndets), dtype=np.int)
# nominal indices
cdef int _i, _j
# sorted indices
cdef int i, j
# temp variables for box i's (the box currently under consideration)
cdef np.float32_t ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
cdef np.float32_t xx1, yy1, xx2, yy2
cdef np.float32_t w, h
cdef np.float32_t inter, ovr
keep = []
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
keep.append(i)
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
return keep
def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def soft_nms_39(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
cdef float tmp
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
for j in range(5, 39):
tmp = boxes[i, j]
boxes[i, j] = boxes[maxpos, j]
boxes[maxpos, j] = tmp
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
for j in range(5, 39):
tmp = boxes[pos, j]
boxes[pos, j] = boxes[N - 1, j]
boxes[N - 1, j] = tmp
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
cdef float mx1,mx2,my1,my2,mts,mbs,mw
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
mx1 = boxes[i, 0] * boxes[i, 5]
my1 = boxes[i, 1] * boxes[i, 5]
mx2 = boxes[i, 2] * boxes[i, 6]
my2 = boxes[i, 3] * boxes[i, 6]
mts = boxes[i, 5]
mbs = boxes[i, 6]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
mw = (1 - weight) ** weight_exp
mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw
my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw
mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw
my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw
mts = mts + boxes[pos, 5] * mw
mbs = mbs + boxes[pos, 6] * mw
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
boxes[i, 0] = mx1 / mts
boxes[i, 1] = my1 / mts
boxes[i, 2] = mx2 / mbs
boxes[i, 3] = my2 / mbs
keep = [i for i in range(N)]
return keep
================================================
FILE: src/lib/external/setup.py
================================================
import numpy
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
extensions = [
Extension(
"nms",
["nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
)
]
setup(
name="coco",
ext_modules=cythonize(extensions),
include_dirs=[numpy.get_include()]
)
================================================
FILE: src/lib/logger.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
import os
import time
import sys
import torch
USE_TENSORBOARD = True
try:
import tensorboardX
print('Using tensorboardX')
except:
USE_TENSORBOARD = False
class Logger(object):
def __init__(self, opt):
"""Create a summary writer logging to log_dir."""
if not os.path.exists(opt.save_dir):
os.makedirs(opt.save_dir)
if not os.path.exists(opt.debug_dir):
os.makedirs(opt.debug_dir)
time_str = time.strftime('%Y-%m-%d-%H-%M')
args = dict((name, getattr(opt, name)) for name in dir(opt)
if not name.startswith('_'))
file_name = os.path.join(opt.save_dir, 'opt.txt')
with open(file_name, 'wt') as opt_file:
opt_file.write('==> torch version: {}\n'.format(torch.__version__))
opt_file.write('==> cudnn version: {}\n'.format(
torch.backends.cudnn.version()))
opt_file.write('==> Cmd:\n')
opt_file.write(str(sys.argv))
opt_file.write('\n==> Opt:\n')
for k, v in sorted(args.items()):
opt_file.write(' %s: %s\n' % (str(k), str(v)))
log_dir = opt.save_dir + '/logs_{}'.format(time_str)
if USE_TENSORBOARD:
self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
else:
if not os.path.exists(os.path.dirname(log_dir)):
os.mkdir(os.path.dirname(log_dir))
if not os.path.exists(log_dir):
os.mkdir(log_dir)
self.log = open(log_dir + '/log.txt', 'w')
try:
os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
except:
pass
self.start_line = True
def write(self, txt):
if self.start_line:
time_str = time.strftime('%Y-%m-%d-%H-%M')
self.log.write('{}: {}'.format(time_str, txt))
else:
self.log.write(txt)
self.start_line = False
if '\n' in txt:
self.start_line = True
self.log.flush()
def close(self):
self.log.close()
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
if USE_TENSORBOARD:
self.writer.add_scalar(tag, value, step)
================================================
FILE: src/lib/models/data_parallel.py
================================================
import torch
from torch.nn.modules import Module
from torch.nn.parallel.scatter_gather import gather
from torch.nn.parallel.replicate import replicate
from torch.nn.parallel.parallel_apply import parallel_apply
from .scatter_gather import scatter_kwargs
class _DataParallel(Module):
r"""Implements data parallelism at the module level.
This container parallelizes the application of the given module by
splitting the input across the specified devices by chunking in the batch
dimension. In the forward pass, the module is replicated on each device,
and each replica handles a portion of the input. During the backwards
pass, gradients from each replica are summed into the original module.
The batch size should be larger than the number of GPUs used. It should
also be an integer multiple of the number of GPUs so that each chunk is the
same size (so that each GPU processes the same number of samples).
See also: :ref:`cuda-nn-dataparallel-instead`
Arbitrary positional and keyword inputs are allowed to be passed into
DataParallel EXCEPT Tensors. All variables will be scattered on dim
specified (default 0). Primitive types will be broadcasted, but all
other types will be a shallow copy and can be corrupted if written to in
the model's forward pass.
Args:
module: module to be parallelized
device_ids: CUDA devices (default: all devices)
output_device: device location of output (default: device_ids[0])
Example::
>>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
>>> output = net(input_var)
"""
# TODO: update notes/cuda.rst when this class handles 8+ GPUs well
def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
super(_DataParallel, self).__init__()
if not torch.cuda.is_available():
self.module = module
self.device_ids = []
return
if device_ids is None:
device_ids = list(range(torch.cuda.device_count()))
if output_device is None:
output_device = device_ids[0]
self.dim = dim
self.module = module
self.device_ids = device_ids
self.chunk_sizes = chunk_sizes
self.output_device = output_device
if len(self.device_ids) == 1:
self.module.cuda(device_ids[0])
def forward(self, *inputs, **kwargs):
if not self.device_ids:
return self.module(*inputs, **kwargs)
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
if len(self.device_ids) == 1:
return self.module(*inputs[0], **kwargs[0])
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
outputs = self.parallel_apply(replicas, inputs, kwargs)
return self.gather(outputs, self.output_device)
def replicate(self, module, device_ids):
return replicate(module, device_ids)
def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
def parallel_apply(self, replicas, inputs, kwargs):
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
def gather(self, outputs, output_device):
return gather(outputs, output_device, dim=self.dim)
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
This is the functional version of the DataParallel module.
Args:
module: the module to evaluate in parallel
inputs: inputs to the module
device_ids: GPU ids on which to replicate module
output_device: GPU location of the output Use -1 to indicate the CPU.
(default: device_ids[0])
Returns:
a Variable containing the result of module(input) located on
output_device
"""
if not isinstance(inputs, tuple):
inputs = (inputs,)
if device_ids is None:
device_ids = list(range(torch.cuda.device_count()))
if output_device is None:
output_device = device_ids[0]
inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
if len(device_ids) == 1:
return module(*inputs[0], **module_kwargs[0])
used_device_ids = device_ids[:len(inputs)]
replicas = replicate(module, used_device_ids)
outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
return gather(outputs, output_device, dim)
def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
if chunk_sizes is None:
return torch.nn.DataParallel(module, device_ids, output_device, dim)
standard_size = True
for i in range(1, len(chunk_sizes)):
if chunk_sizes[i] != chunk_sizes[0]:
standard_size = False
if standard_size:
return torch.nn.DataParallel(module, device_ids, output_device, dim)
return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
================================================
FILE: src/lib/models/decode.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
from .utils import _gather_feat, _transpose_and_gather_feat
def _nms(heat, kernel=3):
pad = (kernel - 1) // 2
hmax = nn.functional.max_pool2d(
heat, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heat).float()
return heat * keep
def _left_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(1, heat.shape[0]):
inds = (heat[i] >= heat[i - 1])
ret[i] += ret[i - 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape)
def _right_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(heat.shape[0] - 2, -1, -1):
inds = (heat[i] >= heat[i +1])
ret[i] += ret[i + 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape)
def _top_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
heat = heat.transpose(3, 2)
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(1, heat.shape[0]):
inds = (heat[i] >= heat[i - 1])
ret[i] += ret[i - 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
def _bottom_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
heat = heat.transpose(3, 2)
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(heat.shape[0] - 2, -1, -1):
inds = (heat[i] >= heat[i + 1])
ret[i] += ret[i + 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
def _h_aggregate(heat, aggr_weight=0.1):
return aggr_weight * _left_aggregate(heat) + \
aggr_weight * _right_aggregate(heat) + heat
def _v_aggregate(heat, aggr_weight=0.1):
return aggr_weight * _top_aggregate(heat) + \
aggr_weight * _bottom_aggregate(heat) + heat
'''
# Slow for large number of categories
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
topk_clses = (topk_inds / (height * width)).int()
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
'''
def _topk_channel(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
return topk_scores, topk_inds, topk_ys, topk_xs
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
topk_clses = (topk_ind / K).int()
topk_inds = _gather_feat(
topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def agnex_ct_decode(
t_heat, l_heat, b_heat, r_heat, ct_heat,
t_regr=None, l_regr=None, b_regr=None, r_regr=None,
K=40, scores_thresh=0.1, center_thresh=0.1, aggr_weight=0.0, num_dets=1000
):
batch, cat, height, width = t_heat.size()
'''
t_heat = torch.sigmoid(t_heat)
l_heat = torch.sigmoid(l_heat)
b_heat = torch.sigmoid(b_heat)
r_heat = torch.sigmoid(r_heat)
ct_heat = torch.sigmoid(ct_heat)
'''
if aggr_weight > 0:
t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)
l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
# perform nms on heatmaps
t_heat = _nms(t_heat)
l_heat = _nms(l_heat)
b_heat = _nms(b_heat)
r_heat = _nms(r_heat)
t_heat[t_heat > 1] = 1
l_heat[l_heat > 1] = 1
b_heat[b_heat > 1] = 1
r_heat[r_heat > 1] = 1
t_scores, t_inds, _, t_ys, t_xs = _topk(t_heat, K=K)
l_scores, l_inds, _, l_ys, l_xs = _topk(l_heat, K=K)
b_scores, b_inds, _, b_ys, b_xs = _topk(b_heat, K=K)
r_scores, r_inds, _, r_ys, r_xs = _topk(r_heat, K=K)
ct_heat_agn, ct_clses = torch.max(ct_heat, dim=1, keepdim=True)
# import pdb; pdb.set_trace()
t_ys = t_ys.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
t_xs = t_xs.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_ys = l_ys.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
l_xs = l_xs.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_ys = b_ys.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
b_xs = b_xs.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_ys = r_ys.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
r_xs = r_xs.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
box_ct_xs = ((l_xs + r_xs + 0.5) / 2).long()
box_ct_ys = ((t_ys + b_ys + 0.5) / 2).long()
ct_inds = box_ct_ys * width + box_ct_xs
ct_inds = ct_inds.view(batch, -1)
ct_heat_agn = ct_heat_agn.view(batch, -1, 1)
ct_clses = ct_clses.view(batch, -1, 1)
ct_scores = _gather_feat(ct_heat_agn, ct_inds)
clses = _gather_feat(ct_clses, ct_inds)
t_scores = t_scores.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_scores = l_scores.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_scores = b_scores.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_scores = r_scores.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
ct_scores = ct_scores.view(batch, K, K, K, K)
scores = (t_scores + l_scores + b_scores + r_scores + 2 * ct_scores) / 6
# reject boxes based on classes
top_inds = (t_ys > l_ys) + (t_ys > b_ys) + (t_ys > r_ys)
top_inds = (top_inds > 0)
left_inds = (l_xs > t_xs) + (l_xs > b_xs) + (l_xs > r_xs)
left_inds = (left_inds > 0)
bottom_inds = (b_ys < t_ys) + (b_ys < l_ys) + (b_ys < r_ys)
bottom_inds = (bottom_inds > 0)
right_inds = (r_xs < t_xs) + (r_xs < l_xs) + (r_xs < b_xs)
right_inds = (right_inds > 0)
sc_inds = (t_scores < scores_thresh) + (l_scores < scores_thresh) + \
(b_scores < scores_thresh) + (r_scores < scores_thresh) + \
(ct_scores < center_thresh)
sc_inds = (sc_inds > 0)
scores = scores - sc_inds.float()
scores = scores - top_inds.float()
scores = scores - left_inds.float()
scores = scores - bottom_inds.float()
scores = scores - right_inds.float()
scores = scores.view(batch, -1)
scores, inds = torch.topk(scores, num_dets)
scores = scores.unsqueeze(2)
if t_regr is not None and l_regr is not None \
and b_regr is not None and r_regr is not None:
t_regr = _transpose_and_gather_feat(t_regr, t_inds)
t_regr = t_regr.view(batch, K, 1, 1, 1, 2)
l_regr = _transpose_and_gather_feat(l_regr, l_inds)
l_regr = l_regr.view(batch, 1, K, 1, 1, 2)
b_regr = _transpose_and_gather_feat(b_regr, b_inds)
b_regr = b_regr.view(batch, 1, 1, K, 1, 2)
r_regr = _transpose_and_gather_feat(r_regr, r_inds)
r_regr = r_regr.view(batch, 1, 1, 1, K, 2)
t_xs = t_xs + t_regr[..., 0]
t_ys = t_ys + t_regr[..., 1]
l_xs = l_xs + l_regr[..., 0]
l_ys = l_ys + l_regr[..., 1]
b_xs = b_xs + b_regr[..., 0]
b_ys = b_ys + b_regr[..., 1]
r_xs = r_xs + r_regr[..., 0]
r_ys = r_ys + r_regr[..., 1]
else:
t_xs = t_xs + 0.5
t_ys = t_ys + 0.5
l_xs = l_xs + 0.5
l_ys = l_ys + 0.5
b_xs = b_xs + 0.5
b_ys = b_ys + 0.5
r_xs = r_xs + 0.5
r_ys = r_ys + 0.5
bboxes = torch.stack((l_xs, t_ys, r_xs, b_ys), dim=5)
bboxes = bboxes.view(batch, -1, 4)
bboxes = _gather_feat(bboxes, inds)
clses = clses.contiguous().view(batch, -1, 1)
clses = _gather_feat(clses, inds).float()
t_xs = t_xs.contiguous().view(batch, -1, 1)
t_xs = _gather_feat(t_xs, inds).float()
t_ys = t_ys.contiguous().view(batch, -1, 1)
t_ys = _gather_feat(t_ys, inds).float()
l_xs = l_xs.contiguous().view(batch, -1, 1)
l_xs = _gather_feat(l_xs, inds).float()
l_ys = l_ys.contiguous().view(batch, -1, 1)
l_ys = _gather_feat(l_ys, inds).float()
b_xs = b_xs.contiguous().view(batch, -1, 1)
b_xs = _gather_feat(b_xs, inds).float()
b_ys = b_ys.contiguous().view(batch, -1, 1)
b_ys = _gather_feat(b_ys, inds).float()
r_xs = r_xs.contiguous().view(batch, -1, 1)
r_xs = _gather_feat(r_xs, inds).float()
r_ys = r_ys.contiguous().view(batch, -1, 1)
r_ys = _gather_feat(r_ys, inds).float()
detections = torch.cat([bboxes, scores, t_xs, t_ys, l_xs, l_ys,
b_xs, b_ys, r_xs, r_ys, clses], dim=2)
return detections
def exct_decode(
t_heat, l_heat, b_heat, r_heat, ct_heat,
t_regr=None, l_regr=None, b_regr=None, r_regr=None,
K=40, scores_thresh=0.1, center_thresh=0.1, aggr_weight=0.0, num_dets=1000
):
batch, cat, height, width = t_heat.size()
'''
t_heat = torch.sigmoid(t_heat)
l_heat = torch.sigmoid(l_heat)
b_heat = torch.sigmoid(b_heat)
r_heat = torch.sigmoid(r_heat)
ct_heat = torch.sigmoid(ct_heat)
'''
if aggr_weight > 0:
t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)
l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
# perform nms on heatmaps
t_heat = _nms(t_heat)
l_heat = _nms(l_heat)
b_heat = _nms(b_heat)
r_heat = _nms(r_heat)
t_heat[t_heat > 1] = 1
l_heat[l_heat > 1] = 1
b_heat[b_heat > 1] = 1
r_heat[r_heat > 1] = 1
t_scores, t_inds, t_clses, t_ys, t_xs = _topk(t_heat, K=K)
l_scores, l_inds, l_clses, l_ys, l_xs = _topk(l_heat, K=K)
b_scores, b_inds, b_clses, b_ys, b_xs = _topk(b_heat, K=K)
r_scores, r_inds, r_clses, r_ys, r_xs = _topk(r_heat, K=K)
t_ys = t_ys.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
t_xs = t_xs.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_ys = l_ys.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
l_xs = l_xs.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_ys = b_ys.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
b_xs = b_xs.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_ys = r_ys.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
r_xs = r_xs.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
t_clses = t_clses.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_clses = l_clses.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_clses = b_clses.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_clses = r_clses.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
box_ct_xs = ((l_xs + r_xs + 0.5) / 2).long()
box_ct_ys = ((t_ys + b_ys + 0.5) / 2).long()
ct_inds = t_clses.long() * (height * width) + box_ct_ys * width + box_ct_xs
ct_inds = ct_inds.view(batch, -1)
ct_heat = ct_heat.view(batch, -1, 1)
ct_scores = _gather_feat(ct_heat, ct_inds)
t_scores = t_scores.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_scores = l_scores.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_scores = b_scores.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_scores = r_scores.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
ct_scores = ct_scores.view(batch, K, K, K, K)
scores = (t_scores + l_scores + b_scores + r_scores + 2 * ct_scores) / 6
# reject boxes based on classes
cls_inds = (t_clses != l_clses) + (t_clses != b_clses) + \
(t_clses != r_clses)
cls_inds = (cls_inds > 0)
top_inds = (t_ys > l_ys) + (t_ys > b_ys) + (t_ys > r_ys)
top_inds = (top_inds > 0)
left_inds = (l_xs > t_xs) + (l_xs > b_xs) + (l_xs > r_xs)
left_inds = (left_inds > 0)
bottom_inds = (b_ys < t_ys) + (b_ys < l_ys) + (b_ys < r_ys)
bottom_inds = (bottom_inds > 0)
right_inds = (r_xs < t_xs) + (r_xs < l_xs) + (r_xs < b_xs)
right_inds = (right_inds > 0)
sc_inds = (t_scores < scores_thresh) + (l_scores < scores_thresh) + \
(b_scores < scores_thresh) + (r_scores < scores_thresh) + \
(ct_scores < center_thresh)
sc_inds = (sc_inds > 0)
scores = scores - sc_inds.float()
scores = scores - cls_inds.float()
scores = scores - top_inds.float()
scores = scores - left_inds.float()
scores = scores - bottom_inds.float()
scores = scores - right_inds.float()
scores = scores.view(batch, -1)
scores, inds = torch.topk(scores, num_dets)
scores = scores.unsqueeze(2)
if t_regr is not None and l_regr is not None \
and b_regr is not None and r_regr is not None:
t_regr = _transpose_and_gather_feat(t_regr, t_inds)
t_regr = t_regr.view(batch, K, 1, 1, 1, 2)
l_regr = _transpose_and_gather_feat(l_regr, l_inds)
l_regr = l_regr.view(batch, 1, K, 1, 1, 2)
b_regr = _transpose_and_gather_feat(b_regr, b_inds)
b_regr = b_regr.view(batch, 1, 1, K, 1, 2)
r_regr = _transpose_and_gather_feat(r_regr, r_inds)
r_regr = r_regr.view(batch, 1, 1, 1, K, 2)
t_xs = t_xs + t_regr[..., 0]
t_ys = t_ys + t_regr[..., 1]
l_xs = l_xs + l_regr[..., 0]
l_ys = l_ys + l_regr[..., 1]
b_xs = b_xs + b_regr[..., 0]
b_ys = b_ys + b_regr[..., 1]
r_xs = r_xs + r_regr[..., 0]
r_ys = r_ys + r_regr[..., 1]
else:
t_xs = t_xs + 0.5
t_ys = t_ys + 0.5
l_xs = l_xs + 0.5
l_ys = l_ys + 0.5
b_xs = b_xs + 0.5
b_ys = b_ys + 0.5
r_xs = r_xs + 0.5
r_ys = r_ys + 0.5
bboxes = torch.stack((l_xs, t_ys, r_xs, b_ys), dim=5)
bboxes = bboxes.view(batch, -1, 4)
bboxes = _gather_feat(bboxes, inds)
clses = t_clses.contiguous().view(batch, -1, 1)
clses = _gather_feat(clses, inds).float()
t_xs = t_xs.contiguous().view(batch, -1, 1)
t_xs = _gather_feat(t_xs, inds).float()
t_ys = t_ys.contiguous().view(batch, -1, 1)
t_ys = _gather_feat(t_ys, inds).float()
l_xs = l_xs.contiguous().view(batch, -1, 1)
l_xs = _gather_feat(l_xs, inds).float()
l_ys = l_ys.contiguous().view(batch, -1, 1)
l_ys = _gather_feat(l_ys, inds).float()
b_xs = b_xs.contiguous().view(batch, -1, 1)
b_xs = _gather_feat(b_xs, inds).float()
b_ys = b_ys.contiguous().view(batch, -1, 1)
b_ys = _gather_feat(b_ys, inds).float()
r_xs = r_xs.contiguous().view(batch, -1, 1)
r_xs = _gather_feat(r_xs, inds).float()
r_ys = r_ys.contiguous().view(batch, -1, 1)
r_ys = _gather_feat(r_ys, inds).float()
detections = torch.cat([bboxes, scores, t_xs, t_ys, l_xs, l_ys,
b_xs, b_ys, r_xs, r_ys, clses], dim=2)
return detections
def ddd_decode(heat, rot, depth, dim, wh=None, reg=None, K=40):
batch, cat, height, width = heat.size()
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys, xs = _topk(heat, K=K)
if reg is not None:
reg = _transpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
rot = _transpose_and_gather_feat(rot, inds)
rot = rot.view(batch, K, 8)
depth = _transpose_and_gather_feat(depth, inds)
depth = depth.view(batch, K, 1)
dim = _transpose_and_gather_feat(dim, inds)
dim = dim.view(batch, K, 3)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
xs = xs.view(batch, K, 1)
ys = ys.view(batch, K, 1)
if wh is not None:
wh = _transpose_and_gather_feat(wh, inds)
wh = wh.view(batch, K, 2)
detections = torch.cat(
[xs, ys, scores, rot, depth, dim, wh, clses], dim=2)
else:
detections = torch.cat(
[xs, ys, scores, rot, depth, dim, clses], dim=2)
return detections
def ctdet_decode(heat, wh, reg=None, cat_spec_wh=False, K=100):
batch, cat, height, width = heat.size()
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys, xs = _topk(heat, K=K)
if reg is not None:
reg = _transpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
wh = _transpose_and_gather_feat(wh, inds)
if cat_spec_wh:
wh = wh.view(batch, K, cat, 2)
clses_ind = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2).long()
wh = wh.gather(2, clses_ind).view(batch, K, 2)
else:
wh = wh.view(batch, K, 2)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
detections = torch.cat([bboxes, scores, clses], dim=2)
return detections
def multi_pose_decode(
heat, wh, kps, reg=None, hm_hp=None, hp_offset=None, K=100):
batch, cat, height, width = heat.size()
num_joints = kps.shape[1] // 2
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys, xs = _topk(heat, K=K)
kps = _transpose_and_gather_feat(kps, inds)
kps = kps.view(batch, K, num_joints * 2)
kps[..., ::2] += xs.view(batch, K, 1).expand(batch, K, num_joints)
kps[..., 1::2] += ys.view(batch, K, 1).expand(batch, K, num_joints)
if reg is not None:
reg = _transpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
wh = _transpose_and_gather_feat(wh, inds)
wh = wh.view(batch, K, 2)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
if hm_hp is not None:
hm_hp = _nms(hm_hp)
thresh = 0.1
kps = kps.view(batch, K, num_joints, 2).permute(
0, 2, 1, 3).contiguous() # b x J x K x 2
reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K
if hp_offset is not None:
hp_offset = _transpose_and_gather_feat(
hp_offset, hm_inds.view(batch, -1))
hp_offset = hp_offset.view(batch, num_joints, K, 2)
hm_xs = hm_xs + hp_offset[:, :, :, 0]
hm_ys = hm_ys + hp_offset[:, :, :, 1]
else:
hm_xs = hm_xs + 0.5
hm_ys = hm_ys + 0.5
mask = (hm_score > thresh).float()
hm_score = (1 - mask) * -1 + mask * hm_score
hm_ys = (1 - mask) * (-10000) + mask * hm_ys
hm_xs = (1 - mask) * (-10000) + mask * hm_xs
hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(
2).expand(batch, num_joints, K, K, 2)
dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5)
min_dist, min_ind = dist.min(dim=3) # b x J x K
hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1
min_dist = min_dist.unsqueeze(-1)
min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(
batch, num_joints, K, 1, 2)
hm_kps = hm_kps.gather(3, min_ind)
hm_kps = hm_kps.view(batch, num_joints, K, 2)
l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
(hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + \
(hm_score < thresh) + (min_dist > (torch.max(b - t, r - l) * 0.3))
mask = (mask > 0).float().expand(batch, num_joints, K, 2)
kps = (1 - mask) * hm_kps + mask * kps
kps = kps.permute(0, 2, 1, 3).contiguous().view(
batch, K, num_joints * 2)
detections = torch.cat([bboxes, scores, kps, clses], dim=2)
return detections
================================================
FILE: src/lib/models/losses.py
================================================
# ------------------------------------------------------------------------------
# Portions of this code are from
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
from .utils import _transpose_and_gather_feat
import torch.nn.functional as F
def _slow_neg_loss(pred, gt):
'''focal loss from CornerNet'''
pos_inds = gt.eq(1)
neg_inds = gt.lt(1)
neg_weights = torch.pow(1 - gt[neg_inds], 4)
loss = 0
pos_pred = pred[pos_inds]
neg_pred = pred[neg_inds]
pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if pos_pred.nelement() == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _neg_loss(pred, gt):
''' Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory
Arguments:
pred (batch x c x h x w)
gt_regr (batch x c x h x w)
'''
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _not_faster_neg_loss(pred, gt):
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
num_pos = pos_inds.float().sum()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
trans_pred = pred * neg_inds + (1 - pred) * pos_inds
weight = neg_weights * neg_inds + pos_inds
all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
all_loss = all_loss.sum()
if num_pos > 0:
all_loss /= num_pos
loss -= all_loss
return loss
def _slow_reg_loss(regr, gt_regr, mask):
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr)
regr = regr[mask]
gt_regr = gt_regr[mask]
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
def _reg_loss(regr, gt_regr, mask):
''' L1 regression loss
Arguments:
regr (batch x max_objects x dim)
gt_regr (batch x max_objects x dim)
mask (batch x max_objects)
'''
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr).float()
regr = regr * mask
gt_regr = gt_regr * mask
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
class FocalLoss(nn.Module):
'''nn.Module warpper for focal loss'''
def __init__(self):
super(FocalLoss, self).__init__()
self.neg_loss = _neg_loss
def forward(self, out, target):
return self.neg_loss(out, target)
class RegLoss(nn.Module):
'''Regression loss for an output tensor
Arguments:
output (batch x dim x h x w)
mask (batch x max_objects)
ind (batch x max_objects)
target (batch x max_objects x dim)
'''
def __init__(self):
super(RegLoss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
loss = _reg_loss(pred, target, mask)
return loss
class RegL1Loss(nn.Module):
def __init__(self):
super(RegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class NormRegL1Loss(nn.Module):
def __init__(self):
super(NormRegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
pred = pred / (target + 1e-4)
target = target * 0 + 1
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class RegWeightedL1Loss(nn.Module):
def __init__(self):
super(RegWeightedL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class L1Loss(nn.Module):
def __init__(self):
super(L1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
return loss
class BinRotLoss(nn.Module):
def __init__(self):
super(BinRotLoss, self).__init__()
def forward(self, output, mask, ind, rotbin, rotres):
pred = _transpose_and_gather_feat(output, ind)
loss = compute_rot_loss(pred, rotbin, rotres, mask)
return loss
def compute_res_loss(output, target):
return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
# TODO: weight
def compute_bin_loss(output, target, mask):
mask = mask.expand_as(output)
output = output * mask.float()
return F.cross_entropy(output, target, reduction='elementwise_mean')
def compute_rot_loss(output, target_bin, target_res, mask):
# output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
# bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
# target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
# target_res: (B, 128, 2) [bin1_res, bin2_res]
# mask: (B, 128, 1)
# import pdb; pdb.set_trace()
output = output.view(-1, 8)
target_bin = target_bin.view(-1, 2)
target_res = target_res.view(-1, 2)
mask = mask.view(-1, 1)
loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
loss_res = torch.zeros_like(loss_bin1)
if target_bin[:, 0].nonzero().shape[0] > 0:
idx1 = target_bin[:, 0].nonzero()[:, 0]
valid_output1 = torch.index_select(output, 0, idx1.long())
valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
loss_sin1 = compute_res_loss(
valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
loss_cos1 = compute_res_loss(
valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
loss_res += loss_sin1 + loss_cos1
if target_bin[:, 1].nonzero().shape[0] > 0:
idx2 = target_bin[:, 1].nonzero()[:, 0]
valid_output2 = torch.index_select(output, 0, idx2.long())
valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
loss_sin2 = compute_res_loss(
valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
loss_cos2 = compute_res_loss(
valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
loss_res += loss_sin2 + loss_cos2
return loss_bin1 + loss_bin2 + loss_res
================================================
FILE: src/lib/models/model.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torchvision.models as models
import torch
import torch.nn as nn
import os
from .networks.msra_resnet import get_pose_net
from .networks.dlav0 import get_pose_net as get_dlav0
from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
from .networks.large_hourglass import get_large_hourglass_net
_model_factory = {
'res': get_pose_net, # default Resnet with deconv
'dlav0': get_dlav0, # default DLAup
'dla': get_dla_dcn,
'resdcn': get_pose_net_dcn,
'hourglass': get_large_hourglass_net,
}
def create_model(arch, heads, head_conv):
num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
arch = arch[:arch.find('_')] if '_' in arch else arch
get_model = _model_factory[arch]
model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
return model
def load_model(model, model_path, optimizer=None, resume=False,
lr=None, lr_step=None):
start_epoch = 0
checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
state_dict_ = checkpoint['state_dict']
state_dict = {}
# convert data_parallal to model
for k in state_dict_:
if k.startswith('module') and not k.startswith('module_list'):
state_dict[k[7:]] = state_dict_[k]
else:
state_dict[k] = state_dict_[k]
model_state_dict = model.state_dict()
# check loaded parameters and created model parameters
msg = 'If you see this, your model does not fully load the ' + \
'pre-trained weight. Please make sure ' + \
'you have correctly specified --arch xxx ' + \
'or set the correct --num_classes for your own dataset.'
for k in state_dict:
if k in model_state_dict:
if state_dict[k].shape != model_state_dict[k].shape:
print('Skip loading parameter {}, required shape{}, '\
'loaded shape{}. {}'.format(
k, model_state_dict[k].shape, state_dict[k].shape, msg))
state_dict[k] = model_state_dict[k]
else:
print('Drop parameter {}.'.format(k) + msg)
for k in model_state_dict:
if not (k in state_dict):
print('No param {}.'.format(k) + msg)
state_dict[k] = model_state_dict[k]
model.load_state_dict(state_dict, strict=False)
# resume optimizer parameters
if optimizer is not None and resume:
if 'optimizer' in checkpoint:
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
start_lr = lr
for step in lr_step:
if start_epoch >= step:
start_lr *= 0.1
for param_group in optimizer.param_groups:
param_group['lr'] = start_lr
print('Resumed optimizer with start lr', start_lr)
else:
print('No optimizer parameters in checkpoint.')
if optimizer is not None:
return model, optimizer, start_epoch
else:
return model
def save_model(path, epoch, model, optimizer=None):
if isinstance(model, torch.nn.DataParallel):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
data = {'epoch': epoch,
'state_dict': state_dict}
if not (optimizer is None):
data['optimizer'] = optimizer.state_dict()
torch.save(data, path)
================================================
FILE: src/lib/models/networks/DCNv2/.gitignore
================================================
.vscode
.idea
*.so
*.o
*pyc
_ext
================================================
FILE: src/lib/models/networks/DCNv2/LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2019, Charles Shang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: src/lib/models/networks/DCNv2/README.md
================================================
## Deformable Convolutional Networks V2 with Pytorch
### Build
```bash
./make.sh # build
python test.py # run examples and gradient check
```
### An Example
- deformable conv
```python
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
print(output.shape)
```
- deformable roi pooling
```python
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
# wrap all things (offset and mask) in DCNPooling
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
```
### Known Issues:
- [x] Gradient check w.r.t offset (solved)
- [ ] Backward is not reentrant (minor)
This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
non-differential points?
Update: all gradient check passes with double precision.
Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
float `<1e-15` for double),
so it may not be a serious problem (?)
Please post an issue or PR if you have any comments.
================================================
FILE: src/lib/models/networks/DCNv2/__init__.py
================================================
================================================
FILE: src/lib/models/networks/DCNv2/build.py
================================================
import os
import torch
from torch.utils.ffi import create_extension
sources = ['src/dcn_v2.c']
headers = ['src/dcn_v2.h']
defines = []
with_cuda = False
extra_objects = []
if torch.cuda.is_available():
print('Including CUDA code.')
sources += ['src/dcn_v2_cuda.c']
headers += ['src/dcn_v2_cuda.h']
defines += [('WITH_CUDA', None)]
extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o']
extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o']
with_cuda = True
else:
raise ValueError('CUDA is not available')
extra_compile_args = ['-fopenmp', '-std=c99']
this_file = os.path.dirname(os.path.realpath(__file__))
print(this_file)
sources = [os.path.join(this_file, fname) for fname in sources]
headers = [os.path.join(this_file, fname) for fname in headers]
extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
ffi = create_extension(
'_ext.dcn_v2',
headers=headers,
sources=sources,
define_macros=defines,
relative_to=__file__,
with_cuda=with_cuda,
extra_objects=extra_objects,
extra_compile_args=extra_compile_args
)
if __name__ == '__main__':
ffi.build()
================================================
FILE: src/lib/models/networks/DCNv2/build_double.py
================================================
import os
import torch
from torch.utils.ffi import create_extension
sources = ['src/dcn_v2_double.c']
headers = ['src/dcn_v2_double.h']
defines = []
with_cuda = False
extra_objects = []
if torch.cuda.is_available():
print('Including CUDA code.')
sources += ['src/dcn_v2_cuda_double.c']
headers += ['src/dcn_v2_cuda_double.h']
defines += [('WITH_CUDA', None)]
extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o']
extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o']
with_cuda = True
else:
raise ValueError('CUDA is not available')
extra_compile_args = ['-fopenmp', '-std=c99']
this_file = os.path.dirname(os.path.realpath(__file__))
print(this_file)
sources = [os.path.join(this_file, fname) for fname in sources]
headers = [os.path.join(this_file, fname) for fname in headers]
extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
ffi = create_extension(
'_ext.dcn_v2_double',
headers=headers,
sources=sources,
define_macros=defines,
relative_to=__file__,
with_cuda=with_cuda,
extra_objects=extra_objects,
extra_compile_args=extra_compile_args
)
if __name__ == '__main__':
ffi.build()
================================================
FILE: src/lib/models/networks/DCNv2/dcn_v2.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import torch
import math
from torch import nn
from torch.nn.modules.utils import _pair
from .dcn_v2_func import DCNv2Function
from .dcn_v2_func import DCNv2PoolingFunction
class DCNv2(nn.Module):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding, dilation=1, deformable_groups=1):
super(DCNv2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.deformable_groups = deformable_groups
self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
self.bias = nn.Parameter(torch.Tensor(out_channels))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
self.bias.data.zero_()
def forward(self, input, offset, mask):
func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
return func(input, offset, mask, self.weight, self.bias)
class DCN(DCNv2):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding,
dilation=1, deformable_groups=1):
super(DCN, self).__init__(in_channels, out_channels,
kernel_size, stride, padding, dilation, deformable_groups)
self.conv_offset_mask = nn.Conv2d(self.in_channels,
self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=(self.stride, self.stride),
padding=(self.padding, self.padding),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset_mask.weight.data.zero_()
self.conv_offset_mask.bias.data.zero_()
def forward(self, input):
out = self.conv_offset_mask(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
return func(input, offset, mask, self.weight, self.bias)
class DCNv2Pooling(nn.Module):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DCNv2Pooling, self).__init__()
self.spatial_scale = spatial_scale
self.pooled_size = pooled_size
self.output_dim = output_dim
self.no_trans = no_trans
self.group_size = group_size
self.part_size = pooled_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
self.func = DCNv2PoolingFunction(self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
def forward(self, data, rois, offset):
if self.no_trans:
offset = data.new()
return self.func(data, rois, offset)
class DCNPooling(DCNv2Pooling):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
deform_fc_dim=1024):
super(DCNPooling, self).__init__(spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size,
part_size,
sample_per_part,
trans_std)
self.deform_fc_dim = deform_fc_dim
if not no_trans:
self.func_offset = DCNv2PoolingFunction(self.spatial_scale,
self.pooled_size,
self.output_dim,
True,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
self.offset_fc = nn.Sequential(
nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 2)
)
self.offset_fc[4].weight.data.zero_()
self.offset_fc[4].bias.data.zero_()
self.mask_fc = nn.Sequential(
nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 1),
nn.Sigmoid()
)
self.mask_fc[2].weight.data.zero_()
self.mask_fc[2].bias.data.zero_()
def forward(self, data, rois):
if self.no_trans:
offset = data.new()
else:
n = rois.shape[0]
offset = data.new()
x = self.func_offset(data, rois, offset)
offset = self.offset_fc(x.view(n, -1))
offset = offset.view(n, 2, self.pooled_size, self.pooled_size)
mask = self.mask_fc(x.view(n, -1))
mask = mask.view(n, 1, self.pooled_size, self.pooled_size)
feat = self.func(data, rois, offset) * mask
return feat
return self.func(data, rois, offset)
================================================
FILE: src/lib/models/networks/DCNv2/dcn_v2_func.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import torch
from torch.autograd import Function
from ._ext import dcn_v2 as _backend
# from _ext import dcn_v2_double as _backend
class DCNv2Function(Function):
def __init__(self, stride, padding, dilation=1, deformable_groups=1):
super(DCNv2Function, self).__init__()
self.stride = stride
self.padding = padding
self.dilation = dilation
self.deformable_groups = deformable_groups
def forward(self, input, offset, mask, weight, bias):
if not input.is_cuda:
raise NotImplementedError
if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
self.save_for_backward(input, offset, mask, weight, bias)
output = input.new(*self._infer_shape(input, weight))
self._bufs = [input.new(), input.new()]
_backend.dcn_v2_cuda_forward(input, weight,
bias, self._bufs[0],
offset, mask,
output, self._bufs[1],
weight.shape[2], weight.shape[3],
self.stride, self.stride,
self.padding, self.padding,
self.dilation, self.dilation,
self.deformable_groups)
return output
def backward(self, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
input, offset, mask, weight, bias = self.saved_tensors
grad_input = input.new(*input.size()).zero_()
grad_offset = offset.new(*offset.size()).zero_()
grad_mask = mask.new(*mask.size()).zero_()
grad_weight = weight.new(*weight.size()).zero_()
grad_bias = bias.new(*bias.size()).zero_()
_backend.dcn_v2_cuda_backward(input, weight,
bias, self._bufs[0],
offset, mask,
self._bufs[1],
grad_input, grad_weight,
grad_bias, grad_offset,
grad_mask, grad_output,
weight.shape[2], weight.shape[3],
self.stride, self.stride,
self.padding, self.padding,
self.dilation, self.dilation,
self.deformable_groups)
return grad_input, grad_offset, grad_mask, grad_weight, grad_bias
def _infer_shape(self, input, weight):
n = input.size(0)
channels_out = weight.size(0)
height, width = input.shape[2:4]
kernel_h, kernel_w = weight.shape[2:4]
height_out = (height + 2 * self.padding -
(self.dilation * (kernel_h - 1) + 1)) // self.stride + 1
width_out = (width + 2 * self.padding - (self.dilation *
(kernel_w - 1) + 1)) // self.stride + 1
return (n, channels_out, height_out, width_out)
class DCNv2PoolingFunction(Function):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DCNv2PoolingFunction, self).__init__()
self.spatial_scale = spatial_scale
self.pooled_size = pooled_size
self.output_dim = output_dim
self.no_trans = no_trans
self.group_size = group_size
self.part_size = pooled_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
assert self.trans_std >= 0.0 and self.trans_std <= 1.0
def forward(self, data, rois, offset):
if not data.is_cuda:
raise NotImplementedError
output = data.new(*self._infer_shape(data, rois))
output_count = data.new(*self._infer_shape(data, rois))
_backend.dcn_v2_psroi_pooling_cuda_forward(data, rois, offset,
output, output_count,
self.no_trans, self.spatial_scale,
self.output_dim, self.group_size,
self.pooled_size, self.part_size,
self.sample_per_part, self.trans_std)
if data.requires_grad or rois.requires_grad or offset.requires_grad:
self.save_for_backward(data, rois, offset, output_count)
return output
def backward(self, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
data, rois, offset, output_count = self.saved_tensors
grad_input = data.new(*data.size()).zero_()
grad_offset = offset.new(*offset.size()).zero_()
_backend.dcn_v2_psroi_pooling_cuda_backward(grad_output,
data,
rois,
offset,
output_count,
grad_input,
grad_offset,
self.no_trans,
self.spatial_scale,
self.output_dim,
self.group_size,
self.pooled_size,
self.part_size,
self.sample_per_part,
self.trans_std)
return grad_input, None, grad_offset
def _infer_shape(self, data, rois):
# _, c, h, w = data.shape[:4]
c = data.shape[1]
n = rois.shape[0]
return (n, self.output_dim, self.pooled_size, self.pooled_size)
================================================
FILE: src/lib/models/networks/DCNv2/make.sh
================================================
#!/usr/bin/env bash
cd src/cuda
# compile dcn
nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC
nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC
# compile dcn-roi-pooling
nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC
cd -
python build.py
python build_double.py
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.cu
================================================
#include "dcn_v2_im2col_cuda.h"
#include
#include
#include
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
__device__ float dmcn_im2col_bilinear(const float *bottom_data, const int data_width,
const int height, const int width, float h, float w)
{
int h_low = floor(h);
int w_low = floor(w);
int h_high = h_low + 1;
int w_high = w_low + 1;
float lh = h - h_low;
float lw = w - w_low;
float hh = 1 - lh, hw = 1 - lw;
float v1 = 0;
if (h_low >= 0 && w_low >= 0)
v1 = bottom_data[h_low * data_width + w_low];
float v2 = 0;
if (h_low >= 0 && w_high <= width - 1)
v2 = bottom_data[h_low * data_width + w_high];
float v3 = 0;
if (h_high <= height - 1 && w_low >= 0)
v3 = bottom_data[h_high * data_width + w_low];
float v4 = 0;
if (h_high <= height - 1 && w_high <= width - 1)
v4 = bottom_data[h_high * data_width + w_high];
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
__device__ float dmcn_get_gradient_weight(float argmax_h, float argmax_w,
const int h, const int w, const int height, const int width)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (h == argmax_h_low && w == argmax_w_low)
weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
if (h == argmax_h_low && w == argmax_w_high)
weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
if (h == argmax_h_high && w == argmax_w_low)
weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
if (h == argmax_h_high && w == argmax_w_high)
weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
return weight;
}
__device__ float dmcn_get_coordinate_weight(float argmax_h, float argmax_w,
const int height, const int width, const float *im_data,
const int data_width, const int bp_dir)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (bp_dir == 0)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
else if (bp_dir == 1)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
return weight;
}
__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
const float *data_im, const float *data_offset, const float *data_mask,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int num_channels, const int deformable_group,
const int height_col, const int width_col,
float *data_col)
{
CUDA_KERNEL_LOOP(index, n)
{
// index index of output matrix
const int w_col = index % width_col;
const int h_col = (index / width_col) % height_col;
const int b_col = (index / width_col / height_col) % batch_size;
const int c_im = (index / width_col / height_col) / batch_size;
const int c_col = c_im * kernel_h * kernel_w;
// compute deformable group index
const int deformable_group_index = c_im / channel_per_deformable_group;
const int h_in = h_col * stride_h - pad_h;
const int w_in = w_col * stride_w - pad_w;
float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
//const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
for (int i = 0; i < kernel_h; ++i)
{
for (int j = 0; j < kernel_w; ++j)
{
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
float val = static_cast(0);
const float h_im = h_in + i * dilation_h + offset_h;
const float w_im = w_in + j * dilation_w + offset_w;
//if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
{
//const float map_h = i * dilation_h + offset_h;
//const float map_w = j * dilation_w + offset_w;
//const int cur_height = height - h_in;
//const int cur_width = width - w_in;
//val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
}
*data_col_ptr = val * mask;
data_col_ptr += batch_size * height_col * width_col;
//data_col_ptr += height_col * width_col;
}
}
}
}
__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
const float *data_col, const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int deformable_group,
const int height_col, const int width_col,
float *grad_im)
{
CUDA_KERNEL_LOOP(index, n)
{
const int j = (index / width_col / height_col / batch_size) % kernel_w;
const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
// compute the start and end of the output
const int deformable_group_index = c / channel_per_deformable_group;
int w_out = index % width_col;
int h_out = (index / width_col) % height_col;
int b = (index / width_col / height_col) % batch_size;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
const float cur_inv_h_data = h_in + i * dilation_h + offset_h;
const float cur_inv_w_data = w_in + j * dilation_w + offset_w;
const float cur_top_grad = data_col[index] * mask;
const int cur_h = (int)cur_inv_h_data;
const int cur_w = (int)cur_inv_w_data;
for (int dy = -2; dy <= 2; dy++)
{
for (int dx = -2; dx <= 2; dx++)
{
if (cur_h + dy >= 0 && cur_h + dy < height &&
cur_w + dx >= 0 && cur_w + dx < width &&
abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
abs(cur_inv_w_data - (cur_w + dx)) < 1)
{
int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
float weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
}
}
}
}
}
__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
const float *data_col, const float *data_im,
const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int offset_channels, const int deformable_group,
const int height_col, const int width_col,
float *grad_offset, float *grad_mask)
{
CUDA_KERNEL_LOOP(index, n)
{
float val = 0, mval = 0;
int w = index % width_col;
int h = (index / width_col) % height_col;
int c = (index / width_col / height_col) % offset_channels;
int b = (index / width_col / height_col) / offset_channels;
// compute the start and end of the output
const int deformable_group_index = c / (2 * kernel_h * kernel_w);
const int col_step = kernel_h * kernel_w;
int cnt = 0;
const float *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
const float *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
{
const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
const int bp_dir = offset_c % 2;
int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
int w_out = col_pos % width_col;
int h_out = (col_pos / width_col) % height_col;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
float inv_h = h_in + i * dilation_h + offset_h;
float inv_w = w_in + j * dilation_w + offset_w;
if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
{
inv_h = inv_w = -2;
}
else
{
mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
}
const float weight = dmcn_get_coordinate_weight(
inv_h, inv_w,
height, width, data_im_ptr + cnt * height * width, width, bp_dir);
val += weight * data_col_ptr[col_pos] * mask;
cnt += 1;
}
// KERNEL_ASSIGN(grad_offset[index], offset_req, val);
grad_offset[index] = val;
if (offset_c % 2 == 0)
// KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
}
}
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const float* data_im, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float* data_col) {
// num_axes should be smaller than block size
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * batch_size * height_col * width_col;
modulated_deformable_im2col_gpu_kernel
<<>>(
num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kenerl_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
batch_size, channels, deformable_group, height_col, width_col, data_col);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const float* data_col, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float* grad_im){
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
modulated_deformable_col2im_gpu_kernel
<<>>(
num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, deformable_group, height_col, width_col, grad_im);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const float* data_col, const float* data_im, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
float* grad_offset, float* grad_mask) {
const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
modulated_deformable_col2im_coord_gpu_kernel
<<>>(
num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
grad_offset, grad_mask);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
}
}
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h
================================================
/*!
******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
*
* COPYRIGHT
*
* All contributions by the University of California:
* Copyright (c) 2014-2017 The Regents of the University of California (Regents)
* All rights reserved.
*
* All other contributions:
* Copyright (c) 2014-2017, the respective contributors
* All rights reserved.
*
* Caffe uses a shared copyright model: each contributor holds copyright over
* their contributions to Caffe. The project versioning records all such
* contribution and copyright details. If a contributor wants to further mark
* their specific copyright on a particular contribution, they should indicate
* their copyright solely in the commit message of the change when it is
* committed.
*
* LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* CONTRIBUTION AGREEMENT
*
* By contributing to the BVLC/caffe repository through pull-request, comment,
* or otherwise, the contributor releases their content to the
* license and copyright terms herein.
*
***************** END Caffe Copyright Notice and Disclaimer ********************
*
* Copyright (c) 2018 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file modulated_deformable_im2col.h
* \brief Function definitions of converting an image to
* column matrix based on kernel, padding, dilation, and offset.
* These functions are mainly used in deformable convolution operators.
* \ref: https://arxiv.org/abs/1811.11168
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
*/
/***************** Adapted by Charles Shang *********************/
#ifndef DCN_V2_IM2COL_CUDA
#define DCN_V2_IM2COL_CUDA
#ifdef __cplusplus
extern "C"
{
#endif
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const float *data_im, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float *data_col);
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const float *data_col, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float *grad_im);
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
float *grad_offset, float *grad_mask);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.cu
================================================
#include "dcn_v2_im2col_cuda_double.h"
#include
#include
#include
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 512;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
#else
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
__device__ double dmcn_im2col_bilinear(const double *bottom_data, const int data_width,
const int height, const int width, double h, double w)
{
int h_low = floor(h);
int w_low = floor(w);
int h_high = h_low + 1;
int w_high = w_low + 1;
double lh = h - h_low;
double lw = w - w_low;
double hh = 1 - lh, hw = 1 - lw;
double v1 = 0;
if (h_low >= 0 && w_low >= 0)
v1 = bottom_data[h_low * data_width + w_low];
double v2 = 0;
if (h_low >= 0 && w_high <= width - 1)
v2 = bottom_data[h_low * data_width + w_high];
double v3 = 0;
if (h_high <= height - 1 && w_low >= 0)
v3 = bottom_data[h_high * data_width + w_low];
double v4 = 0;
if (h_high <= height - 1 && w_high <= width - 1)
v4 = bottom_data[h_high * data_width + w_high];
double w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
double val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
__device__ double dmcn_get_gradient_weight(double argmax_h, double argmax_w,
const int h, const int w, const int height, const int width)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
double weight = 0;
if (h == argmax_h_low && w == argmax_w_low)
weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
if (h == argmax_h_low && w == argmax_w_high)
weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
if (h == argmax_h_high && w == argmax_w_low)
weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
if (h == argmax_h_high && w == argmax_w_high)
weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
return weight;
}
__device__ double dmcn_get_coordinate_weight(double argmax_h, double argmax_w,
const int height, const int width, const double *im_data,
const int data_width, const int bp_dir)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
double weight = 0;
if (bp_dir == 0)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
else if (bp_dir == 1)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
return weight;
}
__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
const double *data_im, const double *data_offset, const double *data_mask,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int num_channels, const int deformable_group,
const int height_col, const int width_col,
double *data_col)
{
CUDA_KERNEL_LOOP(index, n)
{
// index index of output matrix
const int w_col = index % width_col;
const int h_col = (index / width_col) % height_col;
const int b_col = (index / width_col / height_col) % batch_size;
const int c_im = (index / width_col / height_col) / batch_size;
const int c_col = c_im * kernel_h * kernel_w;
// compute deformable group index
const int deformable_group_index = c_im / channel_per_deformable_group;
const int h_in = h_col * stride_h - pad_h;
const int w_in = w_col * stride_w - pad_w;
double *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
//const double* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
const double *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
const double *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const double *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
for (int i = 0; i < kernel_h; ++i)
{
for (int j = 0; j < kernel_w; ++j)
{
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
const double offset_h = data_offset_ptr[data_offset_h_ptr];
const double offset_w = data_offset_ptr[data_offset_w_ptr];
const double mask = data_mask_ptr[data_mask_hw_ptr];
double val = static_cast(0);
const double h_im = h_in + i * dilation_h + offset_h;
const double w_im = w_in + j * dilation_w + offset_w;
//if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
{
//const double map_h = i * dilation_h + offset_h;
//const double map_w = j * dilation_w + offset_w;
//const int cur_height = height - h_in;
//const int cur_width = width - w_in;
//val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
}
*data_col_ptr = val * mask;
data_col_ptr += batch_size * height_col * width_col;
//data_col_ptr += height_col * width_col;
}
}
}
}
__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
const double *data_col, const double *data_offset, const double *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int deformable_group,
const int height_col, const int width_col,
double *grad_im)
{
CUDA_KERNEL_LOOP(index, n)
{
const int j = (index / width_col / height_col / batch_size) % kernel_w;
const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
// compute the start and end of the output
const int deformable_group_index = c / channel_per_deformable_group;
int w_out = index % width_col;
int h_out = (index / width_col) % height_col;
int b = (index / width_col / height_col) % batch_size;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const double *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const double *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
const double offset_h = data_offset_ptr[data_offset_h_ptr];
const double offset_w = data_offset_ptr[data_offset_w_ptr];
const double mask = data_mask_ptr[data_mask_hw_ptr];
const double cur_inv_h_data = h_in + i * dilation_h + offset_h;
const double cur_inv_w_data = w_in + j * dilation_w + offset_w;
const double cur_top_grad = data_col[index] * mask;
const int cur_h = (int)cur_inv_h_data;
const int cur_w = (int)cur_inv_w_data;
for (int dy = -2; dy <= 2; dy++)
{
for (int dx = -2; dx <= 2; dx++)
{
if (cur_h + dy >= 0 && cur_h + dy < height &&
cur_w + dx >= 0 && cur_w + dx < width &&
abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
abs(cur_inv_w_data - (cur_w + dx)) < 1)
{
int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
double weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
}
}
}
}
}
__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
const double *data_col, const double *data_im,
const double *data_offset, const double *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int offset_channels, const int deformable_group,
const int height_col, const int width_col,
double *grad_offset, double *grad_mask)
{
CUDA_KERNEL_LOOP(index, n)
{
double val = 0, mval = 0;
int w = index % width_col;
int h = (index / width_col) % height_col;
int c = (index / width_col / height_col) % offset_channels;
int b = (index / width_col / height_col) / offset_channels;
// compute the start and end of the output
const int deformable_group_index = c / (2 * kernel_h * kernel_w);
const int col_step = kernel_h * kernel_w;
int cnt = 0;
const double *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
const double *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
const double *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const double *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
{
const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
const int bp_dir = offset_c % 2;
int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
int w_out = col_pos % width_col;
int h_out = (col_pos / width_col) % height_col;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
const double offset_h = data_offset_ptr[data_offset_h_ptr];
const double offset_w = data_offset_ptr[data_offset_w_ptr];
const double mask = data_mask_ptr[data_mask_hw_ptr];
double inv_h = h_in + i * dilation_h + offset_h;
double inv_w = w_in + j * dilation_w + offset_w;
if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
{
inv_h = inv_w = -2;
}
else
{
mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
}
const double weight = dmcn_get_coordinate_weight(
inv_h, inv_w,
height, width, data_im_ptr + cnt * height * width, width, bp_dir);
val += weight * data_col_ptr[col_pos] * mask;
cnt += 1;
}
// KERNEL_ASSIGN(grad_offset[index], offset_req, val);
grad_offset[index] = val;
if (offset_c % 2 == 0)
// KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
}
}
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const double *data_im, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, double *data_col)
{
// num_axes should be smaller than block size
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * batch_size * height_col * width_col;
modulated_deformable_im2col_gpu_kernel<<>>(
num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kenerl_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
batch_size, channels, deformable_group, height_col, width_col, data_col);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const double *data_col, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, double *grad_im)
{
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
modulated_deformable_col2im_gpu_kernel<<>>(
num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, deformable_group, height_col, width_col, grad_im);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
double *grad_offset, double *grad_mask)
{
const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
modulated_deformable_col2im_coord_gpu_kernel<<>>(
num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
grad_offset, grad_mask);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
}
}
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h
================================================
/*!
******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
*
* COPYRIGHT
*
* All contributions by the University of California:
* Copyright (c) 2014-2017 The Regents of the University of California (Regents)
* All rights reserved.
*
* All other contributions:
* Copyright (c) 2014-2017, the respective contributors
* All rights reserved.
*
* Caffe uses a shared copyright model: each contributor holds copyright over
* their contributions to Caffe. The project versioning records all such
* contribution and copyright details. If a contributor wants to further mark
* their specific copyright on a particular contribution, they should indicate
* their copyright solely in the commit message of the change when it is
* committed.
*
* LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* CONTRIBUTION AGREEMENT
*
* By contributing to the BVLC/caffe repository through pull-request, comment,
* or otherwise, the contributor releases their content to the
* license and copyright terms herein.
*
***************** END Caffe Copyright Notice and Disclaimer ********************
*
* Copyright (c) 2018 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file modulated_deformable_im2col.h
* \brief Function definitions of converting an image to
* column matrix based on kernel, padding, dilation, and offset.
* These functions are mainly used in deformable convolution operators.
* \ref: https://arxiv.org/abs/1811.11168
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
*/
/***************** Adapted by Charles Shang *********************/
#ifndef DCN_V2_IM2COL_CUDA_DOUBLE
#define DCN_V2_IM2COL_CUDA_DOUBLE
#ifdef __cplusplus
extern "C"
{
#endif
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const double *data_im, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, double *data_col);
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const double *data_col, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, double *grad_im);
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
double *grad_offset, double *grad_mask);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.cu
================================================
/*!
* Copyright (c) 2017 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file deformable_psroi_pooling.cu
* \brief
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
#include "dcn_v2_psroi_pooling_cuda.h"
#include
#include
#include
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
__device__ float bilinear_interp(
const float *data,
const float x,
const float y,
const int width,
const int height)
{
int x1 = floor(x);
int x2 = ceil(x);
int y1 = floor(y);
int y2 = ceil(y);
float dist_x = (float)(x - x1);
float dist_y = (float)(y - y1);
float value11 = data[y1 * width + x1];
float value12 = data[y2 * width + x1];
float value21 = data[y1 * width + x2];
float value22 = data[y2 * width + x2];
float value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;
return value;
}
__global__ void DeformablePSROIPoolForwardKernel(
const int count,
const float *bottom_data,
const float spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const float *bottom_rois, const float *bottom_trans,
const int no_trans,
const float trans_std,
const int sample_per_part,
const int output_dim,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class,
float *top_data,
float *top_count)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const float *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
float roi_start_w = (float)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
float roi_start_h = (float)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
float roi_end_w = (float)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
float roi_end_h = (float)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
float roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
float roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
float bin_size_h = roi_height / (float)(pooled_height);
float bin_size_w = roi_width / (float)(pooled_width);
float sub_bin_size_h = bin_size_h / (float)(sample_per_part);
float sub_bin_size_w = bin_size_w / (float)(sample_per_part);
int part_h = floor((float)(ph) / pooled_height * part_size);
int part_w = floor((float)(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
float trans_x = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
float trans_y = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
float wstart = (float)(pw)*bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
float hstart = (float)(ph)*bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
float sum = 0;
int count = 0;
int gw = floor((float)(pw)*group_size / pooled_width);
int gh = floor((float)(ph)*group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
const float *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
float w = wstart + iw * sub_bin_size_w;
float h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
float val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);
sum += val;
count++;
}
}
top_data[index] = count == 0 ? (float)(0) : sum / count;
top_count[index] = count;
}
}
__global__ void DeformablePSROIPoolBackwardAccKernel(
const int count,
const float *top_diff,
const float *top_count,
const int num_rois,
const float spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const int output_dim,
float *bottom_data_diff, float *bottom_trans_diff,
const float *bottom_data,
const float *bottom_rois,
const float *bottom_trans,
const int no_trans,
const float trans_std,
const int sample_per_part,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const float *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
float roi_start_w = (float)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
float roi_start_h = (float)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
float roi_end_w = (float)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
float roi_end_h = (float)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
float roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
float roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
float bin_size_h = roi_height / (float)(pooled_height);
float bin_size_w = roi_width / (float)(pooled_width);
float sub_bin_size_h = bin_size_h / (float)(sample_per_part);
float sub_bin_size_w = bin_size_w / (float)(sample_per_part);
int part_h = floor((float)(ph) / pooled_height * part_size);
int part_w = floor((float)(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
float trans_x = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
float trans_y = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
float wstart = (float)(pw)*bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
float hstart = (float)(ph)*bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
if (top_count[index] <= 0)
{
continue;
}
float diff_val = top_diff[index] / top_count[index];
const float *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;
float *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;
int gw = floor((float)(pw)*group_size / pooled_width);
int gh = floor((float)(ph)*group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
float w = wstart + iw * sub_bin_size_w;
float h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
// backward on feature
int x0 = floor(w);
int x1 = ceil(w);
int y0 = floor(h);
int y1 = ceil(h);
float dist_x = w - x0, dist_y = h - y0;
float q00 = (1 - dist_x) * (1 - dist_y);
float q01 = (1 - dist_x) * dist_y;
float q10 = dist_x * (1 - dist_y);
float q11 = dist_x * dist_y;
int bottom_index_base = c * height * width;
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);
if (no_trans)
{
continue;
}
float U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];
float U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];
float U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];
float U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];
float diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;
diff_x *= roi_width;
float diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;
diff_y *= roi_height;
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);
}
}
}
}
void DeformablePSROIPoolForward(cudaStream_t stream,
const float *data,
const float *bbox,
const float *trans,
float *out,
float *top_count,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
const float *bottom_data = data;
const float *bottom_rois = bbox;
const float *bottom_trans = no_trans ? NULL : trans;
float *top_data = out;
float *top_count_data = top_count;
const int pooled_height = pooled_size;
const int pooled_width = pooled_size;
const int count = num_bbox * output_dim * pooled_height * pooled_width;
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
DeformablePSROIPoolForwardKernel<<>>(
count, bottom_data, spatial_scale, channels, height, width, pooled_height, pooled_width,
bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part, output_dim,
group_size, part_size, num_classes, channels_each_class, top_data, top_count_data);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
const float *out_grad,
const float *data,
const float *bbox,
const float *trans,
const float *top_count,
float *in_grad,
float *trans_grad,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
// LOG(INFO) << "DeformablePSROIPoolBackward";
const float *top_diff = out_grad;
const float *bottom_data = data;
const float *bottom_rois = bbox;
const float *bottom_trans = no_trans ? NULL : trans;
float *bottom_data_diff = in_grad;
float *bottom_trans_diff = no_trans ? NULL : trans_grad;
const float *top_count_data = top_count;
const int num_rois = num_bbox;
const int pooled_height = pooled_size;
const int pooled_width = pooled_size;
const int count = num_bbox * output_dim * pooled_height * pooled_width;
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
DeformablePSROIPoolBackwardAccKernel<<>>(
count, top_diff, top_count_data, num_rois, spatial_scale, channels, height, width,
pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff,
bottom_data, bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part,
group_size, part_size, num_classes, channels_each_class);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h
================================================
/*!
* Copyright (c) 2017 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file deformable_psroi_pooling.cu
* \brief
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
#ifndef DCN_V2_PSROI_POOLING_CUDA
#define DCN_V2_PSROI_POOLING_CUDA
#ifdef __cplusplus
extern "C"
{
#endif
void DeformablePSROIPoolForward(cudaStream_t stream,
const float *data,
const float *bbox,
const float *trans,
float *out,
float *top_count,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
const float *out_grad,
const float *data,
const float *bbox,
const float *trans,
const float *top_count,
float *in_grad,
float *trans_grad,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.cu
================================================
/*!
* Copyright (c) 2017 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file deformable_psroi_pooling.cu
* \brief
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
#include "dcn_v2_psroi_pooling_cuda_double.h"
#include
#include
#include
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
#else
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
__device__ double bilinear_interp(
const double *data,
const double x,
const double y,
const int width,
const int height)
{
int x1 = floor(x);
int x2 = ceil(x);
int y1 = floor(y);
int y2 = ceil(y);
double dist_x = (double)(x - x1);
double dist_y = (double)(y - y1);
double value11 = data[y1 * width + x1];
double value12 = data[y2 * width + x1];
double value21 = data[y1 * width + x2];
double value22 = data[y2 * width + x2];
double value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;
return value;
}
__global__ void DeformablePSROIPoolForwardKernel(
const int count,
const double *bottom_data,
const double spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const double *bottom_rois, const double *bottom_trans,
const int no_trans,
const double trans_std,
const int sample_per_part,
const int output_dim,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class,
double *top_data,
double *top_count)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const double *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
double roi_start_w = (double)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
double roi_start_h = (double)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
double roi_end_w = (double)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
double roi_end_h = (double)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
double roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
double roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
double bin_size_h = roi_height / (double)(pooled_height);
double bin_size_w = roi_width / (double)(pooled_width);
double sub_bin_size_h = bin_size_h / (double)(sample_per_part);
double sub_bin_size_w = bin_size_w / (double)(sample_per_part);
int part_h = floor((double)(ph) / pooled_height * part_size);
int part_w = floor((double)(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
double trans_x = no_trans ? (double)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
double trans_y = no_trans ? (double)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
double wstart = (double)(pw)*bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
double hstart = (double)(ph)*bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
double sum = 0;
int count = 0;
int gw = floor((double)(pw)*group_size / pooled_width);
int gh = floor((double)(ph)*group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
const double *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
double w = wstart + iw * sub_bin_size_w;
double h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
double val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);
sum += val;
count++;
}
}
top_data[index] = count == 0 ? (double)(0) : sum / count;
top_count[index] = count;
}
}
__global__ void DeformablePSROIPoolBackwardAccKernel(
const int count,
const double *top_diff,
const double *top_count,
const int num_rois,
const double spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const int output_dim,
double *bottom_data_diff, double *bottom_trans_diff,
const double *bottom_data,
const double *bottom_rois,
const double *bottom_trans,
const int no_trans,
const double trans_std,
const int sample_per_part,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const double *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
double roi_start_w = (double)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
double roi_start_h = (double)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
double roi_end_w = (double)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
double roi_end_h = (double)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
double roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
double roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
double bin_size_h = roi_height / (double)(pooled_height);
double bin_size_w = roi_width / (double)(pooled_width);
double sub_bin_size_h = bin_size_h / (double)(sample_per_part);
double sub_bin_size_w = bin_size_w / (double)(sample_per_part);
int part_h = floor((double)(ph) / pooled_height * part_size);
int part_w = floor((double)(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
double trans_x = no_trans ? (double)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
double trans_y = no_trans ? (double)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
double wstart = (double)(pw)*bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
double hstart = (double)(ph)*bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
if (top_count[index] <= 0)
{
continue;
}
double diff_val = top_diff[index] / top_count[index];
const double *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;
double *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;
int gw = floor((double)(pw)*group_size / pooled_width);
int gh = floor((double)(ph)*group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
double w = wstart + iw * sub_bin_size_w;
double h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
// backward on feature
int x0 = floor(w);
int x1 = ceil(w);
int y0 = floor(h);
int y1 = ceil(h);
double dist_x = w - x0, dist_y = h - y0;
double q00 = (1 - dist_x) * (1 - dist_y);
double q01 = (1 - dist_x) * dist_y;
double q10 = dist_x * (1 - dist_y);
double q11 = dist_x * dist_y;
int bottom_index_base = c * height * width;
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);
if (no_trans)
{
continue;
}
double U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];
double U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];
double U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];
double U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];
double diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;
diff_x *= roi_width;
double diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;
diff_y *= roi_height;
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);
}
}
}
}
void DeformablePSROIPoolForward(cudaStream_t stream,
const double *data,
const double *bbox,
const double *trans,
double *out,
double *top_count,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std)
{
const double *bottom_data = data;
const double *bottom_rois = bbox;
const double *bottom_trans = no_trans ? NULL : trans;
double *top_data = out;
double *top_count_data = top_count;
const int pooled_height = pooled_size;
const int pooled_width = pooled_size;
const int count = num_bbox * output_dim * pooled_height * pooled_width;
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
DeformablePSROIPoolForwardKernel<<>>(
count, bottom_data, spatial_scale, channels, height, width, pooled_height, pooled_width,
bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part, output_dim,
group_size, part_size, num_classes, channels_each_class, top_data, top_count_data);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
const double *out_grad,
const double *data,
const double *bbox,
const double *trans,
const double *top_count,
double *in_grad,
double *trans_grad,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std)
{
// LOG(INFO) << "DeformablePSROIPoolBackward";
const double *top_diff = out_grad;
const double *bottom_data = data;
const double *bottom_rois = bbox;
const double *bottom_trans = no_trans ? NULL : trans;
double *bottom_data_diff = in_grad;
double *bottom_trans_diff = no_trans ? NULL : trans_grad;
const double *top_count_data = top_count;
const int num_rois = num_bbox;
const int pooled_height = pooled_size;
const int pooled_width = pooled_size;
const int count = num_bbox * output_dim * pooled_height * pooled_width;
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
DeformablePSROIPoolBackwardAccKernel<<>>(
count, top_diff, top_count_data, num_rois, spatial_scale, channels, height, width,
pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff,
bottom_data, bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part,
group_size, part_size, num_classes, channels_each_class);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
================================================
FILE: src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h
================================================
/*!
* Copyright (c) 2017 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file deformable_psroi_pooling.cu
* \brief
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
#ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE
#define DCN_V2_PSROI_POOLING_CUDA_DOUBLE
#ifdef __cplusplus
extern "C"
{
#endif
void DeformablePSROIPoolForward(cudaStream_t stream,
const double *data,
const double *bbox,
const double *trans,
double *out,
double *top_count,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std);
void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
const double *out_grad,
const double *data,
const double *bbox,
const double *trans,
const double *top_count,
double *in_grad,
double *trans_grad,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2.c
================================================
#include
#include
#include
void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
THFloatTensor *bias, THFloatTensor *ones,
THFloatTensor *offset, THFloatTensor *mask,
THFloatTensor *output, THFloatTensor *columns,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group)
{
printf("only implemented in GPU");
}
void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
THFloatTensor *bias, THFloatTensor *ones,
THFloatTensor *offset, THFloatTensor *mask,
THFloatTensor *output, THFloatTensor *columns,
THFloatTensor *grad_input, THFloatTensor *grad_weight,
THFloatTensor *grad_bias, THFloatTensor *grad_offset,
THFloatTensor *grad_mask, THFloatTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
printf("only implemented in GPU");
}
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2.h
================================================
void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
THFloatTensor *bias, THFloatTensor *ones,
THFloatTensor *offset, THFloatTensor *mask,
THFloatTensor *output, THFloatTensor *columns,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group);
void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
THFloatTensor *bias, THFloatTensor *ones,
THFloatTensor *offset, THFloatTensor *mask,
THFloatTensor *output, THFloatTensor *columns,
THFloatTensor *grad_input, THFloatTensor *grad_weight,
THFloatTensor *grad_bias, THFloatTensor *grad_offset,
THFloatTensor *grad_mask, THFloatTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_cuda.c
================================================
#include
#include "cuda/dcn_v2_im2col_cuda.h"
#include "cuda/dcn_v2_psroi_pooling_cuda.h"
extern THCState *state;
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
THCudaTensor *bias, THCudaTensor *ones,
THCudaTensor *offset, THCudaTensor *mask,
THCudaTensor *output, THCudaTensor *columns,
int kernel_h, int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w,
const int dilation_h, const int dilation_w,
const int deformable_group)
{
THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THArgCheck(THCudaTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
const int batch = THCudaTensor_size(state, input, 0);
const int channels = THCudaTensor_size(state, input, 1);
const int height = THCudaTensor_size(state, input, 2);
const int width = THCudaTensor_size(state, input, 3);
const int channels_out = THCudaTensor_size(state, weight, 0);
const int channels_kernel = THCudaTensor_size(state, weight, 1);
const int kernel_h_ = THCudaTensor_size(state, weight, 2);
const int kernel_w_ = THCudaTensor_size(state, weight, 3);
if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
kernel_h_, kernel_w, kernel_h_, kernel_w_);
if (channels != channels_kernel)
THError("Input shape and kernel channels wont match: (%d vs %d).",
channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
if (THCudaTensor_nDimension(state, ones) != 2 ||
THCudaTensor_size(state, ones, 0) * THCudaTensor_size(state, ones, 1) < height_out * width_out)
{
// Resize plane and fill with ones...
THCudaTensor_resize2d(state, ones, height_out, width_out);
THCudaTensor_fill(state, ones, 1);
}
// resize output
THCudaTensor_resize4d(state, output, batch, channels_out, height_out, width_out);
// resize temporary columns
THCudaTensor_resize2d(state, columns, channels * kernel_h * kernel_w, 1 * height_out * width_out);
THCudaTensor *input_n = THCudaTensor_new(state);
THCudaTensor *offset_n = THCudaTensor_new(state);
THCudaTensor *mask_n = THCudaTensor_new(state);
THCudaTensor *output_n = THCudaTensor_new(state);
for (int b = 0; b < batch; b++)
{
THCudaTensor_select(state, input_n, input, 0, b);
THCudaTensor_select(state, offset_n, offset, 0, b);
THCudaTensor_select(state, mask_n, mask, 0, b);
THCudaTensor_select(state, output_n, output, 0, b);
// Do Bias first:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
// (N x 1) (1 x M)
long m_ = channels_out;
long n_ = height_out * width_out;
long k_ = 1;
THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
THCudaTensor_data(state, ones), k_,
THCudaTensor_data(state, bias), k_, 0.0f,
THCudaTensor_data(state, output_n), n_);
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
THCudaTensor_data(state, input_n), THCudaTensor_data(state, offset_n),
THCudaTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
deformable_group, THCudaTensor_data(state, columns));
//(k * m) x (m * n)
// Y = WC
long m = channels_out;
long n = height_out * width_out;
long k = channels * kernel_h * kernel_w;
THCudaBlas_Sgemm(state, 'n', 'n', n, m, k, 1.0f,
THCudaTensor_data(state, columns), n,
THCudaTensor_data(state, weight), k, 1.0f,
THCudaTensor_data(state, output_n), n);
}
THCudaTensor_free(state, input_n);
THCudaTensor_free(state, offset_n);
THCudaTensor_free(state, mask_n);
THCudaTensor_free(state, output_n);
}
void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
THCudaTensor *bias, THCudaTensor *ones,
THCudaTensor *offset, THCudaTensor *mask,
THCudaTensor *columns,
THCudaTensor *grad_input, THCudaTensor *grad_weight,
THCudaTensor *grad_bias, THCudaTensor *grad_offset,
THCudaTensor *grad_mask, THCudaTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THArgCheck(THCudaTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
const int batch = THCudaTensor_size(state, input, 0);
const int channels = THCudaTensor_size(state, input, 1);
const int height = THCudaTensor_size(state, input, 2);
const int width = THCudaTensor_size(state, input, 3);
const int channels_out = THCudaTensor_size(state, weight, 0);
const int channels_kernel = THCudaTensor_size(state, weight, 1);
const int kernel_h_ = THCudaTensor_size(state, weight, 2);
const int kernel_w_ = THCudaTensor_size(state, weight, 3);
if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
kernel_h_, kernel_w, kernel_h_, kernel_w_);
if (channels != channels_kernel)
THError("Input shape and kernel channels wont match: (%d vs %d).",
channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
if (THCudaTensor_nDimension(state, ones) != 2 ||
THCudaTensor_size(state, ones, 0) * THCudaTensor_size(state, ones, 1) < height_out * width_out)
{
// Resize plane and fill with ones...
THCudaTensor_resize2d(state, ones, height_out, width_out);
THCudaTensor_fill(state, ones, 1.0f);
}
THCudaTensor_resize4d(state, grad_input, batch, channels, height, width);
THCudaTensor_resize2d(state, columns, channels * kernel_h * kernel_w, height_out * width_out);
THCudaTensor *input_n = THCudaTensor_new(state);
THCudaTensor *offset_n = THCudaTensor_new(state);
THCudaTensor *mask_n = THCudaTensor_new(state);
THCudaTensor *grad_output_n = THCudaTensor_new(state);
THCudaTensor *grad_input_n = THCudaTensor_new(state);
THCudaTensor *grad_offset_n = THCudaTensor_new(state);
THCudaTensor *grad_mask_n = THCudaTensor_new(state);
for (int b = 0; b < batch; b++)
{
THCudaTensor_select(state, input_n, input, 0, b);
THCudaTensor_select(state, offset_n, offset, 0, b);
THCudaTensor_select(state, mask_n, mask, 0, b);
THCudaTensor_select(state, grad_output_n, grad_output, 0, b);
THCudaTensor_select(state, grad_input_n, grad_input, 0, b);
THCudaTensor_select(state, grad_offset_n, grad_offset, 0, b);
THCudaTensor_select(state, grad_mask_n, grad_mask, 0, b);
long m = channels * kernel_h * kernel_w;
long n = height_out * width_out;
long k = channels_out;
THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,
THCudaTensor_data(state, grad_output_n), n,
THCudaTensor_data(state, weight), m, 0.0f,
THCudaTensor_data(state, columns), n);
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cuda(THCState_getCurrentStream(state),
THCudaTensor_data(state, columns),
THCudaTensor_data(state, input_n),
THCudaTensor_data(state, offset_n),
THCudaTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaTensor_data(state, grad_offset_n),
THCudaTensor_data(state, grad_mask_n));
// gradient w.r.t. input data
modulated_deformable_col2im_cuda(THCState_getCurrentStream(state),
THCudaTensor_data(state, columns),
THCudaTensor_data(state, offset_n),
THCudaTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaTensor_data(state, grad_input_n));
// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
THCudaTensor_data(state, input_n),
THCudaTensor_data(state, offset_n),
THCudaTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaTensor_data(state, columns));
long m_ = channels_out;
long n_ = channels * kernel_h * kernel_w;
long k_ = height_out * width_out;
THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
THCudaTensor_data(state, columns), k_,
THCudaTensor_data(state, grad_output_n), k_, 1.0f,
THCudaTensor_data(state, grad_weight), n_);
// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
THCudaBlas_Sgemv(state,
't',
k_, m_, 1.0f,
THCudaTensor_data(state, grad_output_n), k_,
THCudaTensor_data(state, ones), 1, 1.0f,
THCudaTensor_data(state, grad_bias), 1);
}
THCudaTensor_free(state, input_n);
THCudaTensor_free(state, offset_n);
THCudaTensor_free(state, mask_n);
THCudaTensor_free(state, grad_output_n);
THCudaTensor_free(state, grad_input_n);
THCudaTensor_free(state, grad_offset_n);
THCudaTensor_free(state, grad_mask_n);
}
void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
THCudaTensor * trans,
THCudaTensor * out, THCudaTensor * top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
const int batch = THCudaTensor_size(state, input, 0);
const int channels = THCudaTensor_size(state, input, 1);
const int height = THCudaTensor_size(state, input, 2);
const int width = THCudaTensor_size(state, input, 3);
const int channels_trans = no_trans? 2 : THCudaTensor_size(state, trans, 1);
const int num_bbox = THCudaTensor_size(state, bbox, 0);
if (num_bbox != THCudaTensor_size(state, out, 0))
THError("Output shape and bbox number wont match: (%d vs %d).",
THCudaTensor_size(state, out, 0), num_bbox);
DeformablePSROIPoolForward(THCState_getCurrentStream(state),
THCudaTensor_data(state, input),
THCudaTensor_data(state, bbox),
THCudaTensor_data(state, trans),
THCudaTensor_data(state, out),
THCudaTensor_data(state, top_count),
batch, channels, height, width,
num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
}
void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad,
THCudaTensor * input, THCudaTensor * bbox,
THCudaTensor * trans, THCudaTensor * top_count,
THCudaTensor * input_grad, THCudaTensor * trans_grad,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
THArgCheck(THCudaTensor_isContiguous(state, out_grad), 0, "out_grad tensor has to be contiguous");
THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
input_grad, trans_grad));
const int batch = THCudaTensor_size(state, input, 0);
const int channels = THCudaTensor_size(state, input, 1);
const int height = THCudaTensor_size(state, input, 2);
const int width = THCudaTensor_size(state, input, 3);
const int channels_trans = no_trans? 2 : THCudaTensor_size(state, trans, 1);
const int num_bbox = THCudaTensor_size(state, bbox, 0);
if (num_bbox != THCudaTensor_size(state, out_grad, 0))
THError("Output shape and bbox number wont match: (%d vs %d).",
THCudaTensor_size(state, out_grad, 0), num_bbox);
DeformablePSROIPoolBackwardAcc(THCState_getCurrentStream(state),
THCudaTensor_data(state, out_grad),
THCudaTensor_data(state, input),
THCudaTensor_data(state, bbox),
THCudaTensor_data(state, trans),
THCudaTensor_data(state, top_count),
THCudaTensor_data(state, input_grad),
THCudaTensor_data(state, trans_grad),
batch, channels, height, width, num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
}
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_cuda.h
================================================
// #ifndef DCN_V2_CUDA
// #define DCN_V2_CUDA
// #ifdef __cplusplus
// extern "C"
// {
// #endif
void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
THCudaTensor *bias, THCudaTensor *ones,
THCudaTensor *offset, THCudaTensor *mask,
THCudaTensor *output, THCudaTensor *columns,
int kernel_h, int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w,
const int dilation_h, const int dilation_w,
const int deformable_group);
void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
THCudaTensor *bias, THCudaTensor *ones,
THCudaTensor *offset, THCudaTensor *mask,
THCudaTensor *columns,
THCudaTensor *grad_input, THCudaTensor *grad_weight,
THCudaTensor *grad_bias, THCudaTensor *grad_offset,
THCudaTensor *grad_mask, THCudaTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
THCudaTensor * trans,
THCudaTensor * out, THCudaTensor * top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad,
THCudaTensor * input, THCudaTensor * bbox,
THCudaTensor * trans, THCudaTensor * top_count,
THCudaTensor * input_grad, THCudaTensor * trans_grad,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
// #ifdef __cplusplus
// }
// #endif
// #endif
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.c
================================================
#include
#include "cuda/dcn_v2_im2col_cuda_double.h"
#include "cuda/dcn_v2_psroi_pooling_cuda_double.h"
extern THCState *state;
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
THCudaDoubleTensor *output, THCudaDoubleTensor *columns,
int kernel_h, int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w,
const int dilation_h, const int dilation_w,
const int deformable_group)
{
THCAssertSameGPU(THCudaDoubleTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
THArgCheck(THCudaDoubleTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THArgCheck(THCudaDoubleTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
input = THCudaDoubleTensor_newContiguous(state, input);
offset = THCudaDoubleTensor_newContiguous(state, offset);
mask = THCudaDoubleTensor_newContiguous(state, mask);
weight = THCudaDoubleTensor_newContiguous(state, weight);
const int batch = THCudaDoubleTensor_size(state, input, 0);
const int channels = THCudaDoubleTensor_size(state, input, 1);
const int height = THCudaDoubleTensor_size(state, input, 2);
const int width = THCudaDoubleTensor_size(state, input, 3);
const int channels_out = THCudaDoubleTensor_size(state, weight, 0);
const int channels_kernel = THCudaDoubleTensor_size(state, weight, 1);
const int kernel_h_ = THCudaDoubleTensor_size(state, weight, 2);
const int kernel_w_ = THCudaDoubleTensor_size(state, weight, 3);
if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
kernel_h_, kernel_w, kernel_h_, kernel_w_);
if (channels != channels_kernel)
THError("Input shape and kernel channels wont match: (%d vs %d).",
channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
if (THCudaDoubleTensor_nDimension(state, ones) != 2 ||
THCudaDoubleTensor_size(state, ones, 0) * THCudaDoubleTensor_size(state, ones, 1) < height_out * width_out)
{
// Resize plane and fill with ones...
THCudaDoubleTensor_resize2d(state, ones, height_out, width_out);
THCudaDoubleTensor_fill(state, ones, 1);
}
// resize output
THCudaDoubleTensor_resize4d(state, output, batch, channels_out, height_out, width_out);
// resize temporary columns
THCudaDoubleTensor_resize2d(state, columns, channels * kernel_h * kernel_w, 1 * height_out * width_out);
THCudaDoubleTensor *input_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *offset_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *mask_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *output_n = THCudaDoubleTensor_new(state);
for (int b = 0; b < batch; b++)
{
THCudaDoubleTensor_select(state, input_n, input, 0, b);
THCudaDoubleTensor_select(state, offset_n, offset, 0, b);
THCudaDoubleTensor_select(state, mask_n, mask, 0, b);
THCudaDoubleTensor_select(state, output_n, output, 0, b);
// Do Bias first:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
// (N x 1) (1 x M)
long m_ = channels_out;
long n_ = height_out * width_out;
long k_ = 1;
THCudaBlas_Dgemm(state, 't', 'n', n_, m_, k_, 1.0,
THCudaDoubleTensor_data(state, ones), k_,
THCudaDoubleTensor_data(state, bias), k_, 0.0,
THCudaDoubleTensor_data(state, output_n), n_);
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, input_n), THCudaDoubleTensor_data(state, offset_n),
THCudaDoubleTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
deformable_group, THCudaDoubleTensor_data(state, columns));
//(k * m) x (m * n)
// Y = WC
long m = channels_out;
long n = height_out * width_out;
long k = channels * kernel_h * kernel_w;
THCudaBlas_Dgemm(state, 'n', 'n', n, m, k, 1.0f,
THCudaDoubleTensor_data(state, columns), n,
THCudaDoubleTensor_data(state, weight), k, 1.0f,
THCudaDoubleTensor_data(state, output_n), n);
}
THCudaDoubleTensor_free(state, input_n);
THCudaDoubleTensor_free(state, offset_n);
THCudaDoubleTensor_free(state, mask_n);
THCudaDoubleTensor_free(state, output_n);
THCudaDoubleTensor_free(state, input);
THCudaDoubleTensor_free(state, offset);
THCudaDoubleTensor_free(state, mask);
THCudaDoubleTensor_free(state, weight);
}
void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
THCudaDoubleTensor *columns,
THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight,
THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset,
THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
THCAssertSameGPU(THCudaDoubleTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
THArgCheck(THCudaDoubleTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THArgCheck(THCudaDoubleTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
input = THCudaDoubleTensor_newContiguous(state, input);
offset = THCudaDoubleTensor_newContiguous(state, offset);
mask = THCudaDoubleTensor_newContiguous(state, mask);
weight = THCudaDoubleTensor_newContiguous(state, weight);
grad_output = THCudaDoubleTensor_newContiguous(state, grad_output);
const int batch = THCudaDoubleTensor_size(state, input, 0);
const int channels = THCudaDoubleTensor_size(state, input, 1);
const int height = THCudaDoubleTensor_size(state, input, 2);
const int width = THCudaDoubleTensor_size(state, input, 3);
const int channels_out = THCudaDoubleTensor_size(state, weight, 0);
const int channels_kernel = THCudaDoubleTensor_size(state, weight, 1);
const int kernel_h_ = THCudaDoubleTensor_size(state, weight, 2);
const int kernel_w_ = THCudaDoubleTensor_size(state, weight, 3);
if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
kernel_h_, kernel_w, kernel_h_, kernel_w_);
if (channels != channels_kernel)
THError("Input shape and kernel channels wont match: (%d vs %d).",
channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
if (THCudaDoubleTensor_nDimension(state, ones) != 2 ||
THCudaDoubleTensor_size(state, ones, 0) * THCudaDoubleTensor_size(state, ones, 1) < height_out * width_out)
{
// Resize plane and fill with ones...
THCudaDoubleTensor_resize2d(state, ones, height_out, width_out);
THCudaDoubleTensor_fill(state, ones, 1);
}
// THCudaDoubleTensor_resize4d(state, grad_input, batch, channels, height, width);
THCudaDoubleTensor_resize2d(state, columns, channels * kernel_h * kernel_w, height_out * width_out);
THCudaDoubleTensor *input_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *offset_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *mask_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *grad_output_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *grad_input_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *grad_offset_n = THCudaDoubleTensor_new(state);
THCudaDoubleTensor *grad_mask_n = THCudaDoubleTensor_new(state);
for (int b = 0; b < batch; b++)
{
THCudaDoubleTensor_select(state, input_n, input, 0, b);
THCudaDoubleTensor_select(state, offset_n, offset, 0, b);
THCudaDoubleTensor_select(state, mask_n, mask, 0, b);
THCudaDoubleTensor_select(state, grad_output_n, grad_output, 0, b);
THCudaDoubleTensor_select(state, grad_input_n, grad_input, 0, b);
THCudaDoubleTensor_select(state, grad_offset_n, grad_offset, 0, b);
THCudaDoubleTensor_select(state, grad_mask_n, grad_mask, 0, b);
long m = channels * kernel_h * kernel_w;
long n = height_out * width_out;
long k = channels_out;
THCudaBlas_Dgemm(state, 'n', 't', n, m, k, 1.0,
THCudaDoubleTensor_data(state, grad_output_n), n,
THCudaDoubleTensor_data(state, weight), m, 0.0,
THCudaDoubleTensor_data(state, columns), n);
// gradient w.r.t. input offset and mask data
modulated_deformable_col2im_coord_cuda(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, columns),
THCudaDoubleTensor_data(state, input_n),
THCudaDoubleTensor_data(state, offset_n),
THCudaDoubleTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaDoubleTensor_data(state, grad_offset_n),
THCudaDoubleTensor_data(state, grad_mask_n));
// gradient w.r.t. input data
modulated_deformable_col2im_cuda(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, columns),
THCudaDoubleTensor_data(state, offset_n),
THCudaDoubleTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaDoubleTensor_data(state, grad_input_n));
// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, input_n),
THCudaDoubleTensor_data(state, offset_n),
THCudaDoubleTensor_data(state, mask_n),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
THCudaDoubleTensor_data(state, columns));
long m_ = channels_out;
long n_ = channels * kernel_h * kernel_w;
long k_ = height_out * width_out;
THCudaBlas_Dgemm(state, 't', 'n', n_, m_, k_, 1.0,
THCudaDoubleTensor_data(state, columns), k_,
THCudaDoubleTensor_data(state, grad_output_n), k_, 1.0,
THCudaDoubleTensor_data(state, grad_weight), n_);
// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
THCudaBlas_Dgemv(state,
't',
k_, m_, 1.0,
THCudaDoubleTensor_data(state, grad_output_n), k_,
THCudaDoubleTensor_data(state, ones), 1, 1.0,
THCudaDoubleTensor_data(state, grad_bias), 1);
}
THCudaDoubleTensor_free(state, input_n);
THCudaDoubleTensor_free(state, offset_n);
THCudaDoubleTensor_free(state, mask_n);
THCudaDoubleTensor_free(state, grad_output_n);
THCudaDoubleTensor_free(state, grad_input_n);
THCudaDoubleTensor_free(state, grad_offset_n);
THCudaDoubleTensor_free(state, grad_mask_n);
THCudaDoubleTensor_free(state, input);
THCudaDoubleTensor_free(state, offset);
THCudaDoubleTensor_free(state, mask);
THCudaDoubleTensor_free(state, weight);
THCudaDoubleTensor_free(state, grad_output);
}
void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
THCudaDoubleTensor * trans,
THCudaDoubleTensor * out, THCudaDoubleTensor * top_count,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std)
{
THArgCheck(THCudaDoubleTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THCAssertSameGPU(THCudaDoubleTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
const int batch = THCudaDoubleTensor_size(state, input, 0);
const int channels = THCudaDoubleTensor_size(state, input, 1);
const int height = THCudaDoubleTensor_size(state, input, 2);
const int width = THCudaDoubleTensor_size(state, input, 3);
const int channels_trans = no_trans? 2 : THCudaDoubleTensor_size(state, trans, 1);
const int num_bbox = THCudaDoubleTensor_size(state, bbox, 0);
if (num_bbox != THCudaDoubleTensor_size(state, out, 0))
THError("Output shape and bbox number wont match: (%d vs %d).",
THCudaDoubleTensor_size(state, out, 0), num_bbox);
DeformablePSROIPoolForward(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, input),
THCudaDoubleTensor_data(state, bbox),
THCudaDoubleTensor_data(state, trans),
THCudaDoubleTensor_data(state, out),
THCudaDoubleTensor_data(state, top_count),
batch, channels, height, width,
num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
}
void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad,
THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count,
THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std)
{
THArgCheck(THCudaDoubleTensor_isContiguous(state, out_grad), 0, "out_grad tensor has to be contiguous");
THArgCheck(THCudaDoubleTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
THCAssertSameGPU(THCudaDoubleTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
input_grad, trans_grad));
const int batch = THCudaDoubleTensor_size(state, input, 0);
const int channels = THCudaDoubleTensor_size(state, input, 1);
const int height = THCudaDoubleTensor_size(state, input, 2);
const int width = THCudaDoubleTensor_size(state, input, 3);
const int channels_trans = no_trans? 2 : THCudaDoubleTensor_size(state, trans, 1);
const int num_bbox = THCudaDoubleTensor_size(state, bbox, 0);
if (num_bbox != THCudaDoubleTensor_size(state, out_grad, 0))
THError("Output shape and bbox number wont match: (%d vs %d).",
THCudaDoubleTensor_size(state, out_grad, 0), num_bbox);
DeformablePSROIPoolBackwardAcc(THCState_getCurrentStream(state),
THCudaDoubleTensor_data(state, out_grad),
THCudaDoubleTensor_data(state, input),
THCudaDoubleTensor_data(state, bbox),
THCudaDoubleTensor_data(state, trans),
THCudaDoubleTensor_data(state, top_count),
THCudaDoubleTensor_data(state, input_grad),
THCudaDoubleTensor_data(state, trans_grad),
batch, channels, height, width, num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
}
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.h
================================================
// #ifndef DCN_V2_CUDA
// #define DCN_V2_CUDA
// #ifdef __cplusplus
// extern "C"
// {
// #endif
void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
THCudaDoubleTensor *output, THCudaDoubleTensor *columns,
int kernel_h, int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w,
const int dilation_h, const int dilation_w,
const int deformable_group);
void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
THCudaDoubleTensor *columns,
THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight,
THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset,
THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
THCudaDoubleTensor * trans,
THCudaDoubleTensor * out, THCudaDoubleTensor * top_count,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std);
void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad,
THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count,
THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad,
const int no_trans,
const double spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const double trans_std);
// #ifdef __cplusplus
// }
// #endif
// #endif
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_double.c
================================================
#include |
#include
#include
void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group)
{
printf("only implemented in GPU");
}
void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
printf("only implemented in GPU");
}
================================================
FILE: src/lib/models/networks/DCNv2/src/dcn_v2_double.h
================================================
void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group);
void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
================================================
FILE: src/lib/models/networks/DCNv2/test.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import gradcheck
from dcn_v2 import DCNv2
from dcn_v2_func import DCNv2Function
from dcn_v2 import DCNv2Pooling
from dcn_v2_func import DCNv2PoolingFunction
deformable_groups = 1
N, inC, inH, inW = 2, 2, 4, 4
outC = 2
kH, kW = 3, 3
def conv_identify(weight, bias):
weight.data.zero_()
bias.data.zero_()
o, i, h, w = weight.shape
y = h//2
x = w//2
for p in range(i):
for q in range(o):
if p == q:
weight.data[q, p, y, x] = 1.0
def check_zero_offset():
conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
dcn_v2 = DCNv2(inC, outC, (kH, kW),
stride=1, padding=1, dilation=1,
deformable_groups=deformable_groups).cuda()
conv_offset.weight.data.zero_()
conv_offset.bias.data.zero_()
conv_mask.weight.data.zero_()
conv_mask.bias.data.zero_()
conv_identify(dcn_v2.weight, dcn_v2.bias)
input = torch.randn(N, inC, inH, inW).cuda()
offset = conv_offset(input)
mask = conv_mask(input)
mask = torch.sigmoid(mask)
output = dcn_v2(input, offset, mask)
output *= 2
d = (input - output).abs().max()
if d < 1e-10:
print('Zero offset passed')
else:
print('Zero offset failed')
def check_gradient_dconv_double():
input = torch.randn(N, inC, inH, inW, dtype=torch.float64).cuda()
input.requires_grad = True
offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW, dtype=torch.float64).cuda()
# offset.data.zero_()
# offset.data -= 0.00001
offset.requires_grad = True
mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW, dtype=torch.float64).cuda()
# mask.data.zero_()
mask.requires_grad = True
mask = torch.sigmoid(mask)
weight = torch.randn(outC, inC, kH, kW, dtype=torch.float64).cuda()
weight.requires_grad = True
bias = torch.rand(outC, dtype=torch.float64).cuda()
bias.requires_grad = True
func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-6, atol=1e-5, rtol=1e-3))
def check_gradient_dconv():
input = torch.randn(N, inC, inH, inW).cuda()
input.requires_grad = True
offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda()
# offset.data.zero_()
# offset.data -= 0.5
offset.requires_grad = True
mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
# mask.data.zero_()
mask.requires_grad = True
mask = torch.sigmoid(mask)
weight = torch.randn(outC, inC, kH, kW).cuda()
weight.requires_grad = True
bias = torch.rand(outC).cuda()
bias.requires_grad = True
func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-3, atol=1e-3, rtol=1e-2))
def check_pooling_zero_offset():
from dcn_v2 import DCNv2Pooling
input = torch.randn(2, 16, 64, 64).cuda().zero_()
input[0, :, 16:26, 16:26] = 1.
input[1, :, 10:20, 20:30] = 2.
rois = torch.tensor([
[0, 65, 65, 103, 103],
[1, 81, 41, 119, 79],
]).cuda().float()
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=True,
group_size=1,
trans_std=0.1).cuda()
out = pooling(input, rois, input.new())
s = ', '.join(['%f' % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
print(s)
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
offset = torch.randn(20, 2, 7, 7).cuda().zero_()
dout = dpooling(input, rois, offset)
s = ', '.join(['%f' % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
print(s)
def check_gradient_dpooling():
input = torch.randn(2, 3, 5, 5).cuda() * 0.01
N = 4
batch_inds = torch.randint(2, (N, 1)).cuda().float()
x = torch.rand((N, 1)).cuda().float() * 15
y = torch.rand((N, 1)).cuda().float() * 15
w = torch.rand((N, 1)).cuda().float() * 10
h = torch.rand((N, 1)).cuda().float() * 10
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(N, 2, 3, 3).cuda()
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=3,
output_dim=3,
no_trans=False,
group_size=1,
trans_std=0.0).cuda()
input.requires_grad = True
offset.requires_grad = True
print('check_gradient_dpooling', gradcheck(dpooling, (input, rois, offset), eps=1e-4))
def example_dconv():
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
targert = output.new(*output.size())
targert.data.uniform_(-0.01, 0.01)
error = (targert - output).mean()
error.backward()
print(output.shape)
def example_dpooling():
from dcn_v2 import DCNv2Pooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(20, 2, 7, 7).cuda()
input.requires_grad = True
offset.requires_grad = True
# normal roi_align
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=True,
group_size=1,
trans_std=0.1).cuda()
# deformable pooling
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
out = pooling(input, rois, offset)
dout = dpooling(input, rois, offset)
print(out.shape)
print(dout.shape)
target_out = out.new(*out.size())
target_out.data.uniform_(-0.01, 0.01)
target_dout = dout.new(*dout.size())
target_dout.data.uniform_(-0.01, 0.01)
e = (target_out - out).mean()
e.backward()
e = (target_dout - dout).mean()
e.backward()
def example_mdpooling():
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
input.requires_grad = True
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
target = dout.new(*dout.size())
target.data.uniform_(-0.1, 0.1)
error = (target - dout).mean()
error.backward()
print(dout.shape)
if __name__ == '__main__':
example_dconv()
example_dpooling()
example_mdpooling()
check_pooling_zero_offset()
# zero offset check
if inC == outC:
check_zero_offset()
check_gradient_dpooling()
# # gradient check
# try:
# check_gradient_double()
# except TypeError:
# print('''****** You can swith to double precision in dcn_v2_func.py by (un)commenting these two lines:
# ****** from _ext import dcn_v2 as _backend
# ****** from _ext import dcn_v2_double as _backend''')
# print('****** Your tensor may not be **double** type')
# print('****** Switching to **float** type')
#
# check_gradient()
# finally:
# print('****** Note: backward is not reentrant error may not be a serious problem, '
# '****** since the max error is less than 1e-7\n'
# '****** Still looking for what trigger this problem')
================================================
FILE: src/lib/models/networks/dlav0.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from os.path import join
import torch
from torch import nn
import torch.utils.model_zoo as model_zoo
import numpy as np
BatchNorm = nn.BatchNorm2d
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = BatchNorm(planes)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = BatchNorm(bottle_planes)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = BatchNorm(bottle_planes)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = BatchNorm(bottle_planes)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = BatchNorm(bottle_planes)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = BatchNorm(out_channels)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
BatchNorm(out_channels)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, return_levels=False,
pool_size=7, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.return_levels = return_levels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
BatchNorm(channels[0]),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
self.avgpool = nn.AvgPool2d(pool_size)
self.fc = nn.Conv2d(channels[-1], num_classes, kernel_size=1,
stride=1, padding=0, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
BatchNorm(planes),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
BatchNorm(planes),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
if self.return_levels:
return y
else:
x = self.avgpool(x)
x = self.fc(x)
x = x.view(x.size(0), -1)
return x
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
self.fc = fc
def dla34(pretrained, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
def dla46_c(pretrained=None, **kwargs): # DLA-46-C
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 64, 128, 256],
block=Bottleneck, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla46_c')
return model
def dla46x_c(pretrained=None, **kwargs): # DLA-X-46-C
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 64, 128, 256],
block=BottleneckX, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla46x_c')
return model
def dla60x_c(pretrained, **kwargs): # DLA-X-60-C
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 64, 64, 128, 256],
block=BottleneckX, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla60x_c', hash='b870c45c')
return model
def dla60(pretrained=None, **kwargs): # DLA-60
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 128, 256, 512, 1024],
block=Bottleneck, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla60')
return model
def dla60x(pretrained=None, **kwargs): # DLA-X-60
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 128, 256, 512, 1024],
block=BottleneckX, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla60x')
return model
def dla102(pretrained=None, **kwargs): # DLA-102
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=Bottleneck, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102')
return model
def dla102x(pretrained=None, **kwargs): # DLA-X-102
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=BottleneckX, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102x')
return model
def dla102x2(pretrained=None, **kwargs): # DLA-X-102 64
BottleneckX.cardinality = 64
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=BottleneckX, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102x2')
return model
def dla169(pretrained=None, **kwargs): # DLA-169
Bottleneck.expansion = 2
model = DLA([1, 1, 2, 3, 5, 1], [16, 32, 128, 256, 512, 1024],
block=Bottleneck, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla169')
return model
def set_bn(bn):
global BatchNorm
BatchNorm = bn
dla.BatchNorm = bn
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class IDAUp(nn.Module):
def __init__(self, node_kernel, out_dim, channels, up_factors):
super(IDAUp, self).__init__()
self.channels = channels
self.out_dim = out_dim
for i, c in enumerate(channels):
if c == out_dim:
proj = Identity()
else:
proj = nn.Sequential(
nn.Conv2d(c, out_dim,
kernel_size=1, stride=1, bias=False),
BatchNorm(out_dim),
nn.ReLU(inplace=True))
f = int(up_factors[i])
if f == 1:
up = Identity()
else:
up = nn.ConvTranspose2d(
out_dim, out_dim, f * 2, stride=f, padding=f // 2,
output_padding=0, groups=out_dim, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
for i in range(1, len(channels)):
node = nn.Sequential(
nn.Conv2d(out_dim * 2, out_dim,
kernel_size=node_kernel, stride=1,
padding=node_kernel // 2, bias=False),
BatchNorm(out_dim),
nn.ReLU(inplace=True))
setattr(self, 'node_' + str(i), node)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, layers):
assert len(self.channels) == len(layers), \
'{} vs {} layers'.format(len(self.channels), len(layers))
layers = list(layers)
for i, l in enumerate(layers):
upsample = getattr(self, 'up_' + str(i))
project = getattr(self, 'proj_' + str(i))
layers[i] = upsample(project(l))
x = layers[0]
y = []
for i in range(1, len(layers)):
node = getattr(self, 'node_' + str(i))
x = node(torch.cat([x, layers[i]], 1))
y.append(x)
return x, y
class DLAUp(nn.Module):
def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None):
super(DLAUp, self).__init__()
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(3, channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
layers = list(layers)
assert len(layers) > 1
for i in range(len(layers) - 1):
ida = getattr(self, 'ida_{}'.format(i))
x, y = ida(layers[-i - 2:])
layers[-i - 1:] = y
return x
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
# torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
# torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class DLASeg(nn.Module):
def __init__(self, base_name, heads,
pretrained=True, down_ratio=4, head_conv=256):
super(DLASeg, self).__init__()
assert down_ratio in [2, 4, 8, 16]
self.heads = heads
self.first_level = int(np.log2(down_ratio))
self.base = globals()[base_name](
pretrained=pretrained, return_levels=True)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(channels[self.first_level:], scales=scales)
'''
self.fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], classes, kernel_size=1,
stride=1, padding=0, bias=True)
)
'''
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(channels[self.first_level], classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
'''
up_factor = 2 ** self.first_level
if up_factor > 1:
up = nn.ConvTranspose2d(classes, classes, up_factor * 2,
stride=up_factor, padding=up_factor // 2,
output_padding=0, groups=classes,
bias=False)
fill_up_weights(up)
up.weight.requires_grad = False
else:
up = Identity()
self.up = up
self.softmax = nn.LogSoftmax(dim=1)
for m in self.fc.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
'''
def forward(self, x):
x = self.base(x)
x = self.dla_up(x[self.first_level:])
# x = self.fc(x)
# y = self.softmax(self.up(x))
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
'''
def optim_parameters(self, memo=None):
for param in self.base.parameters():
yield param
for param in self.dla_up.parameters():
yield param
for param in self.fc.parameters():
yield param
'''
'''
def dla34up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla34', classes, pretrained_base=pretrained_base, **kwargs)
return model
def dla60up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla60', classes, pretrained_base=pretrained_base, **kwargs)
return model
def dla102up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla102', classes,
pretrained_base=pretrained_base, **kwargs)
return model
def dla169up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla169', classes,
pretrained_base=pretrained_base, **kwargs)
return model
'''
def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):
model = DLASeg('dla{}'.format(num_layers), heads,
pretrained=True,
down_ratio=down_ratio,
head_conv=head_conv)
return model
================================================
FILE: src/lib/models/networks/large_hourglass.py
================================================
# ------------------------------------------------------------------------------
# This code is base on
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
class convolution(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(convolution, self).__init__()
pad = (k - 1) // 2
self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv = self.conv(x)
bn = self.bn(conv)
relu = self.relu(bn)
return relu
class fully_connected(nn.Module):
def __init__(self, inp_dim, out_dim, with_bn=True):
super(fully_connected, self).__init__()
self.with_bn = with_bn
self.linear = nn.Linear(inp_dim, out_dim)
if self.with_bn:
self.bn = nn.BatchNorm1d(out_dim)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
linear = self.linear(x)
bn = self.bn(linear) if self.with_bn else linear
relu = self.relu(bn)
return relu
class residual(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(residual, self).__init__()
self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
self.bn1 = nn.BatchNorm2d(out_dim)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(out_dim)
self.skip = nn.Sequential(
nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
nn.BatchNorm2d(out_dim)
) if stride != 1 or inp_dim != out_dim else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv1 = self.conv1(x)
bn1 = self.bn1(conv1)
relu1 = self.relu1(bn1)
conv2 = self.conv2(relu1)
bn2 = self.bn2(conv2)
skip = self.skip(x)
return self.relu(bn2 + skip)
def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = [layer(k, inp_dim, out_dim, **kwargs)]
for _ in range(1, modules):
layers.append(layer(k, out_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = []
for _ in range(modules - 1):
layers.append(layer(k, inp_dim, inp_dim, **kwargs))
layers.append(layer(k, inp_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
class MergeUp(nn.Module):
def forward(self, up1, up2):
return up1 + up2
def make_merge_layer(dim):
return MergeUp()
# def make_pool_layer(dim):
# return nn.MaxPool2d(kernel_size=2, stride=2)
def make_pool_layer(dim):
return nn.Sequential()
def make_unpool_layer(dim):
return nn.Upsample(scale_factor=2)
def make_kp_layer(cnv_dim, curr_dim, out_dim):
return nn.Sequential(
convolution(3, cnv_dim, curr_dim, with_bn=False),
nn.Conv2d(curr_dim, out_dim, (1, 1))
)
def make_inter_layer(dim):
return residual(3, dim, dim)
def make_cnv_layer(inp_dim, out_dim):
return convolution(3, inp_dim, out_dim)
class kp_module(nn.Module):
def __init__(
self, n, dims, modules, layer=residual,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, **kwargs
):
super(kp_module, self).__init__()
self.n = n
curr_mod = modules[0]
next_mod = modules[1]
curr_dim = dims[0]
next_dim = dims[1]
self.up1 = make_up_layer(
3, curr_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.max1 = make_pool_layer(curr_dim)
self.low1 = make_hg_layer(
3, curr_dim, next_dim, curr_mod,
layer=layer, **kwargs
)
self.low2 = kp_module(
n - 1, dims[1:], modules[1:], layer=layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer,
**kwargs
) if self.n > 1 else \
make_low_layer(
3, next_dim, next_dim, next_mod,
layer=layer, **kwargs
)
self.low3 = make_hg_layer_revr(
3, next_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.up2 = make_unpool_layer(curr_dim)
self.merge = make_merge_layer(curr_dim)
def forward(self, x):
up1 = self.up1(x)
max1 = self.max1(x)
low1 = self.low1(max1)
low2 = self.low2(low1)
low3 = self.low3(low2)
up2 = self.up2(low3)
return self.merge(up1, up2)
class exkp(nn.Module):
def __init__(
self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256,
make_tl_layer=None, make_br_layer=None,
make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
kp_layer=residual
):
super(exkp, self).__init__()
self.nstack = nstack
self.heads = heads
curr_dim = dims[0]
self.pre = nn.Sequential(
convolution(7, 3, 128, stride=2),
residual(3, 128, 256, stride=2)
) if pre is None else pre
self.kps = nn.ModuleList([
kp_module(
n, dims, modules, layer=kp_layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer
) for _ in range(nstack)
])
self.cnvs = nn.ModuleList([
make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
])
self.inters = nn.ModuleList([
make_inter_layer(curr_dim) for _ in range(nstack - 1)
])
self.inters_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
self.cnvs_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
## keypoint heatmaps
for head in heads.keys():
if 'hm' in head:
module = nn.ModuleList([
make_heat_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
for heat in self.__getattr__(head):
heat[-1].bias.data.fill_(-2.19)
else:
module = nn.ModuleList([
make_regr_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
self.relu = nn.ReLU(inplace=True)
def forward(self, image):
# print('image shape', image.shape)
inter = self.pre(image)
outs = []
for ind in range(self.nstack):
kp_, cnv_ = self.kps[ind], self.cnvs[ind]
kp = kp_(inter)
cnv = cnv_(kp)
out = {}
for head in self.heads:
layer = self.__getattr__(head)[ind]
y = layer(cnv)
out[head] = y
outs.append(out)
if ind < self.nstack - 1:
inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
inter = self.relu(inter)
inter = self.inters[ind](inter)
return outs
def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
layers = [layer(kernel, dim0, dim1, stride=2)]
layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
return nn.Sequential(*layers)
class HourglassNet(exkp):
def __init__(self, heads, num_stacks=2):
n = 5
dims = [256, 256, 384, 384, 384, 512]
modules = [2, 2, 2, 2, 2, 4]
super(HourglassNet, self).__init__(
n, num_stacks, dims, modules, heads,
make_tl_layer=None,
make_br_layer=None,
make_pool_layer=make_pool_layer,
make_hg_layer=make_hg_layer,
kp_layer=residual, cnv_dim=256
)
def get_large_hourglass_net(num_layers, heads, head_conv):
model = HourglassNet(heads, 2)
return model
================================================
FILE: src/lib/models/networks/msra_resnet.py
================================================
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv, **kwargs):
self.inplanes = 64
self.deconv_with_bias = False
self.heads = heads
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
# self.final_layer = []
for head in sorted(self.heads):
num_output = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(256, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, num_output,
kernel_size=1, stride=1, padding=0))
else:
fc = nn.Conv2d(
in_channels=256,
out_channels=num_output,
kernel_size=1,
stride=1,
padding=0
)
self.__setattr__(head, fc)
# self.final_layer = nn.ModuleList(self.final_layer)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias))
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def init_weights(self, num_layers, pretrained=True):
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
for _, m in self.deconv_layers.named_modules():
if isinstance(m, nn.ConvTranspose2d):
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.normal_(m.weight, std=0.001)
if self.deconv_with_bias:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
# print('=> init {}.weight as 1'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# print('=> init final conv weights from normal distribution')
for head in self.heads:
final_layer = self.__getattr__(head)
for i, m in enumerate(final_layer.modules()):
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
if m.weight.shape[0] == self.heads[head]:
if 'hm' in head:
nn.init.constant_(m.bias, -2.19)
else:
nn.init.normal_(m.weight, std=0.001)
nn.init.constant_(m.bias, 0)
#pretrained_state_dict = torch.load(pretrained)
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
else:
print('=> imagenet pretrained model dose not exist')
print('=> please download it first')
raise ValueError('imagenet pretrained model does not exist')
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers, pretrained=True)
return model
================================================
FILE: src/lib/models/networks/pose_dla_dcn.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import logging
import numpy as np
from os.path import join
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from .DCNv2.dcn_v2 import DCN
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
# elif isinstance(m, nn.BatchNorm2d):
# m.weight.data.fill_(1)
# m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
return y
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
# fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
# self.fc = fc
def dla34(pretrained=True, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class DeformConv(nn.Module):
def __init__(self, chi, cho):
super(DeformConv, self).__init__()
self.actf = nn.Sequential(
nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
def forward(self, x):
x = self.conv(x)
x = self.actf(x)
return x
class IDAUp(nn.Module):
def __init__(self, o, channels, up_f):
super(IDAUp, self).__init__()
for i in range(1, len(channels)):
c = channels[i]
f = int(up_f[i])
proj = DeformConv(c, o)
node = DeformConv(o, o)
up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
padding=f // 2, output_padding=0,
groups=o, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, layers, startp, endp):
for i in range(startp + 1, endp):
upsample = getattr(self, 'up_' + str(i - startp))
project = getattr(self, 'proj_' + str(i - startp))
layers[i] = upsample(project(layers[i]))
node = getattr(self, 'node_' + str(i - startp))
layers[i] = node(layers[i] + layers[i - 1])
class DLAUp(nn.Module):
def __init__(self, startp, channels, scales, in_channels=None):
super(DLAUp, self).__init__()
self.startp = startp
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
out = [layers[-1]] # start with 32
for i in range(len(layers) - self.startp - 1):
ida = getattr(self, 'ida_{}'.format(i))
ida(layers, len(layers) -i - 2, len(layers))
out.insert(0, layers[-1])
return out
class Interpolate(nn.Module):
def __init__(self, scale, mode):
super(Interpolate, self).__init__()
self.scale = scale
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)
return x
class DLASeg(nn.Module):
def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel,
last_level, head_conv, out_channel=0):
super(DLASeg, self).__init__()
assert down_ratio in [2, 4, 8, 16]
self.first_level = int(np.log2(down_ratio))
self.last_level = last_level
self.base = globals()[base_name](pretrained=pretrained)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)
if out_channel == 0:
out_channel = channels[self.first_level]
self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level],
[2 ** i for i in range(self.last_level - self.first_level)])
self.heads = heads
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(channels[self.first_level], classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
y = []
for i in range(self.last_level - self.first_level):
y.append(x[i].clone())
self.ida_up(y, 0, len(y))
z = {}
for head in self.heads:
z[head] = self.__getattr__(head)(y[-1])
return [z]
def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):
model = DLASeg('dla{}'.format(num_layers), heads,
pretrained=True,
down_ratio=down_ratio,
final_kernel=1,
last_level=5,
head_conv=head_conv)
return model
================================================
FILE: src/lib/models/networks/resnet_dcn.py
================================================
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Dequan Wang and Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import logging
import torch
import torch.nn as nn
from .DCNv2.dcn_v2 import DCN
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
# torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
# torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv):
self.inplanes = 64
self.heads = heads
self.deconv_with_bias = False
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 128, 64],
[4, 4, 4],
)
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(64, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(64, classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
fc = DCN(self.inplanes, planes,
kernel_size=(3,3), stride=1,
padding=1, dilation=1, deformable_groups=1)
# fc = nn.Conv2d(self.inplanes, planes,
# kernel_size=3, stride=1,
# padding=1, dilation=1, bias=False)
# fill_fc_weights(fc)
up = nn.ConvTranspose2d(
in_channels=planes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias)
fill_up_weights(up)
layers.append(fc)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
layers.append(up)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def init_weights(self, num_layers):
if 1:
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
print('=> init deconv weights from normal distribution')
for name, m in self.deconv_layers.named_modules():
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv=256):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers)
return model
================================================
FILE: src/lib/models/scatter_gather.py
================================================
import torch
from torch.autograd import Variable
from torch.nn.parallel._functions import Scatter, Gather
def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
r"""
Slices variables into approximately equal chunks and
distributes them across given GPUs. Duplicates
references to objects that are not variables. Does not
support Tensors.
"""
def scatter_map(obj):
if isinstance(obj, Variable):
return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
assert not torch.is_tensor(obj), "Tensors not supported in scatter."
if isinstance(obj, tuple):
return list(zip(*map(scatter_map, obj)))
if isinstance(obj, list):
return list(map(list, zip(*map(scatter_map, obj))))
if isinstance(obj, dict):
return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
return [obj for targets in target_gpus]
return scatter_map(inputs)
def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
r"""Scatter with support for kwargs dictionary"""
inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
if len(inputs) < len(kwargs):
inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
elif len(kwargs) < len(inputs):
kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
inputs = tuple(inputs)
kwargs = tuple(kwargs)
return inputs, kwargs
================================================
FILE: src/lib/models/utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
def _sigmoid(x):
y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
return y
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def flip_tensor(x):
return torch.flip(x, [3])
# tmp = x.detach().cpu().numpy()[..., ::-1].copy()
# return torch.from_numpy(tmp).to(x.device)
def flip_lr(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
def flip_lr_off(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
tmp = tmp.reshape(tmp.shape[0], 17, 2,
tmp.shape[2], tmp.shape[3])
tmp[:, :, 0, :, :] *= -1
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
================================================
FILE: src/lib/opts.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
class opts(object):
def __init__(self):
self.parser = argparse.ArgumentParser()
# basic experiment setting
self.parser.add_argument('task', default='ctdet',
help='ctdet | ddd | multi_pose | exdet')
self.parser.add_argument('--dataset', default='coco',
help='coco | kitti | coco_hp | pascal')
self.parser.add_argument('--exp_id', default='default')
self.parser.add_argument('--test', action='store_true')
self.parser.add_argument('--debug', type=int, default=0,
help='level of visualization.'
'1: only show the final detection results'
'2: show the network output features'
'3: use matplot to display' # useful when lunching training with ipython notebook
'4: save all visualizations to disk')
self.parser.add_argument('--demo', default='',
help='path to image/ image folders/ video. '
'or "webcam"')
self.parser.add_argument('--load_model', default='',
help='path to pretrained model')
self.parser.add_argument('--resume', action='store_true',
help='resume an experiment. '
'Reloaded the optimizer parameter and '
'set load_model to model_last.pth '
'in the exp dir if load_model is empty.')
# system
self.parser.add_argument('--gpus', default='0',
help='-1 for CPU, use comma for multiple gpus')
self.parser.add_argument('--num_workers', type=int, default=4,
help='dataloader threads. 0 for single-thread.')
self.parser.add_argument('--not_cuda_benchmark', action='store_true',
help='disable when the input size is not fixed.')
self.parser.add_argument('--seed', type=int, default=317,
help='random seed') # from CornerNet
# log
self.parser.add_argument('--print_iter', type=int, default=0,
help='disable progress bar and print to screen.')
self.parser.add_argument('--hide_data_time', action='store_true',
help='not display time during training.')
self.parser.add_argument('--save_all', action='store_true',
help='save model to disk every 5 epochs.')
self.parser.add_argument('--metric', default='loss',
help='main metric to save best model')
self.parser.add_argument('--vis_thresh', type=float, default=0.3,
help='visualization threshold.')
self.parser.add_argument('--debugger_theme', default='white',
choices=['white', 'black'])
# model
self.parser.add_argument('--arch', default='dla_34',
help='model architecture. Currently tested'
'res_18 | res_101 | resdcn_18 | resdcn_101 |'
'dlav0_34 | dla_34 | hourglass')
self.parser.add_argument('--head_conv', type=int, default=-1,
help='conv layer channels for output head'
'0 for no conv layer'
'-1 for default setting: '
'64 for resnets and 256 for dla.')
self.parser.add_argument('--down_ratio', type=int, default=4,
help='output stride. Currently only supports 4.')
# input
self.parser.add_argument('--input_res', type=int, default=-1,
help='input height and width. -1 for default from '
'dataset. Will be overriden by input_h | input_w')
self.parser.add_argument('--input_h', type=int, default=-1,
help='input height. -1 for default from dataset.')
self.parser.add_argument('--input_w', type=int, default=-1,
help='input width. -1 for default from dataset.')
# train
self.parser.add_argument('--lr', type=float, default=1.25e-4,
help='learning rate for batch size 32.')
self.parser.add_argument('--lr_step', type=str, default='90,120',
help='drop learning rate by 10.')
self.parser.add_argument('--num_epochs', type=int, default=140,
help='total training epochs.')
self.parser.add_argument('--batch_size', type=int, default=32,
help='batch size')
self.parser.add_argument('--master_batch_size', type=int, default=-1,
help='batch size on the master gpu.')
self.parser.add_argument('--num_iters', type=int, default=-1,
help='default: #samples / batch_size.')
self.parser.add_argument('--val_intervals', type=int, default=5,
help='number of epochs to run validation.')
self.parser.add_argument('--trainval', action='store_true',
help='include validation in training and '
'test on test set')
# test
self.parser.add_argument('--flip_test', action='store_true',
help='flip data augmentation.')
self.parser.add_argument('--test_scales', type=str, default='1',
help='multi scale test augmentation.')
self.parser.add_argument('--nms', action='store_true',
help='run nms in testing.')
self.parser.add_argument('--K', type=int, default=100,
help='max number of output objects.')
self.parser.add_argument('--not_prefetch_test', action='store_true',
help='not use parallal data pre-processing.')
self.parser.add_argument('--fix_res', action='store_true',
help='fix testing resolution or keep '
'the original resolution')
self.parser.add_argument('--keep_res', action='store_true',
help='keep the original resolution'
' during validation.')
# dataset
self.parser.add_argument('--not_rand_crop', action='store_true',
help='not use the random crop data augmentation'
'from CornerNet.')
self.parser.add_argument('--shift', type=float, default=0.1,
help='when not using random crop'
'apply shift augmentation.')
self.parser.add_argument('--scale', type=float, default=0.4,
help='when not using random crop'
'apply scale augmentation.')
self.parser.add_argument('--rotate', type=float, default=0,
help='when not using random crop'
'apply rotation augmentation.')
self.parser.add_argument('--flip', type = float, default=0.5,
help='probability of applying flip augmentation.')
self.parser.add_argument('--no_color_aug', action='store_true',
help='not use the color augmenation '
'from CornerNet')
# multi_pose
self.parser.add_argument('--aug_rot', type=float, default=0,
help='probability of applying '
'rotation augmentation.')
# ddd
self.parser.add_argument('--aug_ddd', type=float, default=0.5,
help='probability of applying crop augmentation.')
self.parser.add_argument('--rect_mask', action='store_true',
help='for ignored object, apply mask on the '
'rectangular region or just center point.')
self.parser.add_argument('--kitti_split', default='3dop',
help='different validation split for kitti: '
'3dop | subcnn')
# loss
self.parser.add_argument('--mse_loss', action='store_true',
help='use mse loss or focal loss to train '
'keypoint heatmaps.')
# ctdet
self.parser.add_argument('--reg_loss', default='l1',
help='regression loss: sl1 | l1 | l2')
self.parser.add_argument('--hm_weight', type=float, default=1,
help='loss weight for keypoint heatmaps.')
self.parser.add_argument('--off_weight', type=float, default=1,
help='loss weight for keypoint local offsets.')
self.parser.add_argument('--wh_weight', type=float, default=0.1,
help='loss weight for bounding box size.')
# multi_pose
self.parser.add_argument('--hp_weight', type=float, default=1,
help='loss weight for human pose offset.')
self.parser.add_argument('--hm_hp_weight', type=float, default=1,
help='loss weight for human keypoint heatmap.')
# ddd
self.parser.add_argument('--dep_weight', type=float, default=1,
help='loss weight for depth.')
self.parser.add_argument('--dim_weight', type=float, default=1,
help='loss weight for 3d bounding box size.')
self.parser.add_argument('--rot_weight', type=float, default=1,
help='loss weight for orientation.')
self.parser.add_argument('--peak_thresh', type=float, default=0.2)
# task
# ctdet
self.parser.add_argument('--norm_wh', action='store_true',
help='L1(\hat(y) / y, 1) or L1(\hat(y), y)')
self.parser.add_argument('--dense_wh', action='store_true',
help='apply weighted regression near center or '
'just apply regression on center point.')
self.parser.add_argument('--cat_spec_wh', action='store_true',
help='category specific bounding box size.')
self.parser.add_argument('--not_reg_offset', action='store_true',
help='not regress local offset.')
# exdet
self.parser.add_argument('--agnostic_ex', action='store_true',
help='use category agnostic extreme points.')
self.parser.add_argument('--scores_thresh', type=float, default=0.1,
help='threshold for extreme point heatmap.')
self.parser.add_argument('--center_thresh', type=float, default=0.1,
help='threshold for centermap.')
self.parser.add_argument('--aggr_weight', type=float, default=0.0,
help='edge aggregation weight.')
# multi_pose
self.parser.add_argument('--dense_hp', action='store_true',
help='apply weighted pose regression near center '
'or just apply regression on center point.')
self.parser.add_argument('--not_hm_hp', action='store_true',
help='not estimate human joint heatmap, '
'directly use the joint offset from center.')
self.parser.add_argument('--not_reg_hp_offset', action='store_true',
help='not regress local offset for '
'human joint heatmaps.')
self.parser.add_argument('--not_reg_bbox', action='store_true',
help='not regression bounding box size.')
# ground truth validation
self.parser.add_argument('--eval_oracle_hm', action='store_true',
help='use ground center heatmap.')
self.parser.add_argument('--eval_oracle_wh', action='store_true',
help='use ground truth bounding box size.')
self.parser.add_argument('--eval_oracle_offset', action='store_true',
help='use ground truth local heatmap offset.')
self.parser.add_argument('--eval_oracle_kps', action='store_true',
help='use ground truth human pose offset.')
self.parser.add_argument('--eval_oracle_hmhp', action='store_true',
help='use ground truth human joint heatmaps.')
self.parser.add_argument('--eval_oracle_hp_offset', action='store_true',
help='use ground truth human joint local offset.')
self.parser.add_argument('--eval_oracle_dep', action='store_true',
help='use ground truth depth.')
def parse(self, args=''):
if args == '':
opt = self.parser.parse_args()
else:
opt = self.parser.parse_args(args)
opt.gpus_str = opt.gpus
opt.gpus = [int(gpu) for gpu in opt.gpus.split(',')]
opt.gpus = [i for i in range(len(opt.gpus))] if opt.gpus[0] >=0 else [-1]
opt.lr_step = [int(i) for i in opt.lr_step.split(',')]
opt.test_scales = [float(i) for i in opt.test_scales.split(',')]
opt.fix_res = not opt.keep_res
print('Fix size testing.' if opt.fix_res else 'Keep resolution testing.')
opt.reg_offset = not opt.not_reg_offset
opt.reg_bbox = not opt.not_reg_bbox
opt.hm_hp = not opt.not_hm_hp
opt.reg_hp_offset = (not opt.not_reg_hp_offset) and opt.hm_hp
if opt.head_conv == -1: # init default head_conv
opt.head_conv = 256 if 'dla' in opt.arch else 64
opt.pad = 127 if 'hourglass' in opt.arch else 31
opt.num_stacks = 2 if opt.arch == 'hourglass' else 1
if opt.trainval:
opt.val_intervals = 100000000
if opt.debug > 0:
opt.num_workers = 0
opt.batch_size = 1
opt.gpus = [opt.gpus[0]]
opt.master_batch_size = -1
if opt.master_batch_size == -1:
opt.master_batch_size = opt.batch_size // len(opt.gpus)
rest_batch_size = (opt.batch_size - opt.master_batch_size)
opt.chunk_sizes = [opt.master_batch_size]
for i in range(len(opt.gpus) - 1):
slave_chunk_size = rest_batch_size // (len(opt.gpus) - 1)
if i < rest_batch_size % (len(opt.gpus) - 1):
slave_chunk_size += 1
opt.chunk_sizes.append(slave_chunk_size)
print('training chunk_sizes:', opt.chunk_sizes)
opt.root_dir = os.path.join(os.path.dirname(__file__), '..', '..')
opt.data_dir = os.path.join(opt.root_dir, 'data')
opt.exp_dir = os.path.join(opt.root_dir, 'exp', opt.task)
opt.save_dir = os.path.join(opt.exp_dir, opt.exp_id)
opt.debug_dir = os.path.join(opt.save_dir, 'debug')
print('The output will be saved to ', opt.save_dir)
if opt.resume and opt.load_model == '':
model_path = opt.save_dir[:-4] if opt.save_dir.endswith('TEST') \
else opt.save_dir
opt.load_model = os.path.join(model_path, 'model_last.pth')
return opt
def update_dataset_info_and_set_heads(self, opt, dataset):
input_h, input_w = dataset.default_resolution
opt.mean, opt.std = dataset.mean, dataset.std
opt.num_classes = dataset.num_classes
# input_h(w): opt.input_h overrides opt.input_res overrides dataset default
input_h = opt.input_res if opt.input_res > 0 else input_h
input_w = opt.input_res if opt.input_res > 0 else input_w
opt.input_h = opt.input_h if opt.input_h > 0 else input_h
opt.input_w = opt.input_w if opt.input_w > 0 else input_w
opt.output_h = opt.input_h // opt.down_ratio
opt.output_w = opt.input_w // opt.down_ratio
opt.input_res = max(opt.input_h, opt.input_w)
opt.output_res = max(opt.output_h, opt.output_w)
if opt.task == 'exdet':
# assert opt.dataset in ['coco']
num_hm = 1 if opt.agnostic_ex else opt.num_classes
opt.heads = {'hm_t': num_hm, 'hm_l': num_hm,
'hm_b': num_hm, 'hm_r': num_hm,
'hm_c': opt.num_classes}
if opt.reg_offset:
opt.heads.update({'reg_t': 2, 'reg_l': 2, 'reg_b': 2, 'reg_r': 2})
elif opt.task == 'ddd':
# assert opt.dataset in ['gta', 'kitti', 'viper']
opt.heads = {'hm': opt.num_classes, 'dep': 1, 'rot': 8, 'dim': 3}
if opt.reg_bbox:
opt.heads.update(
{'wh': 2})
if opt.reg_offset:
opt.heads.update({'reg': 2})
elif opt.task == 'ctdet':
# assert opt.dataset in ['pascal', 'coco']
opt.heads = {'hm': opt.num_classes,
'wh': 2 if not opt.cat_spec_wh else 2 * opt.num_classes}
if opt.reg_offset:
opt.heads.update({'reg': 2})
elif opt.task == 'multi_pose':
# assert opt.dataset in ['coco_hp']
opt.flip_idx = dataset.flip_idx
opt.heads = {'hm': opt.num_classes, 'wh': 2, 'hps': 34}
if opt.reg_offset:
opt.heads.update({'reg': 2})
if opt.hm_hp:
opt.heads.update({'hm_hp': 17})
if opt.reg_hp_offset:
opt.heads.update({'hp_offset': 2})
else:
assert 0, 'task not defined!'
print('heads', opt.heads)
return opt
def init(self, args=''):
default_dataset_info = {
'ctdet': {'default_resolution': [512, 512], 'num_classes': 80,
'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
'dataset': 'coco'},
'exdet': {'default_resolution': [512, 512], 'num_classes': 80,
'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
'dataset': 'coco'},
'multi_pose': {
'default_resolution': [512, 512], 'num_classes': 1,
'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
'dataset': 'coco_hp', 'num_joints': 17,
'flip_idx': [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
[11, 12], [13, 14], [15, 16]]},
'ddd': {'default_resolution': [384, 1280], 'num_classes': 3,
'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],
'dataset': 'kitti'},
}
class Struct:
def __init__(self, entries):
for k, v in entries.items():
self.__setattr__(k, v)
opt = self.parse(args)
dataset = Struct(default_dataset_info[opt.task])
opt.dataset = dataset.dataset
opt = self.update_dataset_info_and_set_heads(opt, dataset)
return opt
================================================
FILE: src/lib/trains/base_trainer.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import torch
from progress.bar import Bar
from models.data_parallel import DataParallel
from utils.utils import AverageMeter
class ModelWithLoss(torch.nn.Module):
def __init__(self, model, loss):
super(ModelWithLoss, self).__init__()
self.model = model
self.loss = loss
def forward(self, batch):
outputs = self.model(batch['input'])
loss, loss_stats = self.loss(outputs, batch)
return outputs[-1], loss, loss_stats
class BaseTrainer(object):
def __init__(
self, opt, model, optimizer=None):
self.opt = opt
self.optimizer = optimizer
self.loss_stats, self.loss = self._get_losses(opt)
self.model_with_loss = ModelWithLoss(model, self.loss)
def set_device(self, gpus, chunk_sizes, device):
if len(gpus) > 1:
self.model_with_loss = DataParallel(
self.model_with_loss, device_ids=gpus,
chunk_sizes=chunk_sizes).to(device)
else:
self.model_with_loss = self.model_with_loss.to(device)
for state in self.optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.to(device=device, non_blocking=True)
def run_epoch(self, phase, epoch, data_loader):
model_with_loss = self.model_with_loss
if phase == 'train':
model_with_loss.train()
else:
if len(self.opt.gpus) > 1:
model_with_loss = self.model_with_loss.module
model_with_loss.eval()
torch.cuda.empty_cache()
opt = self.opt
results = {}
data_time, batch_time = AverageMeter(), AverageMeter()
avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
end = time.time()
for iter_id, batch in enumerate(data_loader):
if iter_id >= num_iters:
break
data_time.update(time.time() - end)
for k in batch:
if k != 'meta':
batch[k] = batch[k].to(device=opt.device, non_blocking=True)
output, loss, loss_stats = model_with_loss(batch)
loss = loss.mean()
if phase == 'train':
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
batch_time.update(time.time() - end)
end = time.time()
Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
epoch, iter_id, num_iters, phase=phase,
total=bar.elapsed_td, eta=bar.eta_td)
for l in avg_loss_stats:
avg_loss_stats[l].update(
loss_stats[l].mean().item(), batch['input'].size(0))
Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
if not opt.hide_data_time:
Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
'|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
if opt.print_iter > 0:
if iter_id % opt.print_iter == 0:
print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
else:
bar.next()
if opt.debug > 0:
self.debug(batch, output, iter_id)
if opt.test:
self.save_result(output, batch, results)
del output, loss, loss_stats
bar.finish()
ret = {k: v.avg for k, v in avg_loss_stats.items()}
ret['time'] = bar.elapsed_td.total_seconds() / 60.
return ret, results
def debug(self, batch, output, iter_id):
raise NotImplementedError
def save_result(self, output, batch, results):
raise NotImplementedError
def _get_losses(self, opt):
raise NotImplementedError
def val(self, epoch, data_loader):
return self.run_epoch('val', epoch, data_loader)
def train(self, epoch, data_loader):
return self.run_epoch('train', epoch, data_loader)
================================================
FILE: src/lib/trains/ctdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss
from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss
from models.decode import ctdet_decode
from models.utils import _sigmoid
from utils.debugger import Debugger
from utils.post_process import ctdet_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class CtdetLoss(torch.nn.Module):
def __init__(self, opt):
super(CtdetLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
RegLoss() if opt.reg_loss == 'sl1' else None
self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \
NormRegL1Loss() if opt.norm_wh else \
RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, wh_loss, off_loss = 0, 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
if not opt.mse_loss:
output['hm'] = _sigmoid(output['hm'])
if opt.eval_oracle_hm:
output['hm'] = batch['hm']
if opt.eval_oracle_wh:
output['wh'] = torch.from_numpy(gen_oracle_map(
batch['wh'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
output['wh'].shape[3], output['wh'].shape[2])).to(opt.device)
if opt.eval_oracle_offset:
output['reg'] = torch.from_numpy(gen_oracle_map(
batch['reg'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
output['reg'].shape[3], output['reg'].shape[2])).to(opt.device)
hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
if opt.wh_weight > 0:
if opt.dense_wh:
mask_weight = batch['dense_wh_mask'].sum() + 1e-4
wh_loss += (
self.crit_wh(output['wh'] * batch['dense_wh_mask'],
batch['dense_wh'] * batch['dense_wh_mask']) /
mask_weight) / opt.num_stacks
elif opt.cat_spec_wh:
wh_loss += self.crit_wh(
output['wh'], batch['cat_spec_mask'],
batch['ind'], batch['cat_spec_wh']) / opt.num_stacks
else:
wh_loss += self.crit_reg(
output['wh'], batch['reg_mask'],
batch['ind'], batch['wh']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
batch['ind'], batch['reg']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
opt.off_weight * off_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class CtdetTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss']
loss = CtdetLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
reg = output['reg'] if opt.reg_offset else None
dets = ctdet_decode(
output['hm'], output['wh'], reg=reg,
cat_spec_wh=opt.cat_spec_wh, K=opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets[:, :, :4] *= opt.down_ratio
dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
dets_gt[:, :, :4] *= opt.down_ratio
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_blend_img(img, gt, 'gt_hm')
debugger.add_img(img, img_id='out_pred')
for k in range(len(dets[i])):
if dets[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
dets[i, k, 4], img_id='out_pred')
debugger.add_img(img, img_id='out_gt')
for k in range(len(dets_gt[i])):
if dets_gt[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
dets_gt[i, k, 4], img_id='out_gt')
if opt.debug == 4:
debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
reg = output['reg'] if self.opt.reg_offset else None
dets = ctdet_decode(
output['hm'], output['wh'], reg=reg,
cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets_out = ctdet_post_process(
dets.copy(), batch['meta']['c'].cpu().numpy(),
batch['meta']['s'].cpu().numpy(),
output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
================================================
FILE: src/lib/trains/ddd.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss, L1Loss, BinRotLoss
from models.decode import ddd_decode
from models.utils import _sigmoid
from utils.debugger import Debugger
from utils.post_process import ddd_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class DddLoss(torch.nn.Module):
def __init__(self, opt):
super(DddLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = L1Loss()
self.crit_rot = BinRotLoss()
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, dep_loss, rot_loss, dim_loss = 0, 0, 0, 0
wh_loss, off_loss = 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
output['hm'] = _sigmoid(output['hm'])
output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
if opt.eval_oracle_dep:
output['dep'] = torch.from_numpy(gen_oracle_map(
batch['dep'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
opt.output_w, opt.output_h)).to(opt.device)
hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
if opt.dep_weight > 0:
dep_loss += self.crit_reg(output['dep'], batch['reg_mask'],
batch['ind'], batch['dep']) / opt.num_stacks
if opt.dim_weight > 0:
dim_loss += self.crit_reg(output['dim'], batch['reg_mask'],
batch['ind'], batch['dim']) / opt.num_stacks
if opt.rot_weight > 0:
rot_loss += self.crit_rot(output['rot'], batch['rot_mask'],
batch['ind'], batch['rotbin'],
batch['rotres']) / opt.num_stacks
if opt.reg_bbox and opt.wh_weight > 0:
wh_loss += self.crit_reg(output['wh'], batch['rot_mask'],
batch['ind'], batch['wh']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['reg'], batch['rot_mask'],
batch['ind'], batch['reg']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.dep_weight * dep_loss + \
opt.dim_weight * dim_loss + opt.rot_weight * rot_loss + \
opt.wh_weight * wh_loss + opt.off_weight * off_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'dep_loss': dep_loss,
'dim_loss': dim_loss, 'rot_loss': rot_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class DddTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(DddTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'dep_loss', 'dim_loss', 'rot_loss',
'wh_loss', 'off_loss']
loss = DddLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
wh = output['wh'] if opt.reg_bbox else None
reg = output['reg'] if opt.reg_offset else None
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=opt.K)
# x, y, score, r1-r8, depth, dim1-dim3, cls
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
calib = batch['meta']['calib'].detach().numpy()
# x, y, score, rot, depth, dim1, dim2, dim3
# if opt.dataset == 'gta':
# dets[:, 12:15] /= 3
dets_pred = ddd_post_process(
dets.copy(), batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
dets_gt = ddd_post_process(
batch['meta']['gt_det'].detach().numpy().copy(),
batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
#for i in range(input.size(0)):
for i in range(1):
debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug==3),
theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
pred = debugger.gen_colormap(
output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'hm_pred')
debugger.add_blend_img(img, gt, 'hm_gt')
# decode
debugger.add_ct_detection(
img, dets[i], show_box=opt.reg_bbox, center_thresh=opt.center_thresh,
img_id='det_pred')
debugger.add_ct_detection(
img, batch['meta']['gt_det'][i].cpu().numpy().copy(),
show_box=opt.reg_bbox, img_id='det_gt')
debugger.add_3d_detection(
batch['meta']['image_path'][i], dets_pred[i], calib[i],
center_thresh=opt.center_thresh, img_id='add_pred')
debugger.add_3d_detection(
batch['meta']['image_path'][i], dets_gt[i], calib[i],
center_thresh=opt.center_thresh, img_id='add_gt')
# debugger.add_bird_view(
# dets_pred[i], center_thresh=opt.center_thresh, img_id='bird_pred')
# debugger.add_bird_view(dets_gt[i], img_id='bird_gt')
debugger.add_bird_views(
dets_pred[i], dets_gt[i],
center_thresh=opt.center_thresh, img_id='bird_pred_gt')
# debugger.add_blend_img(img, pred, 'out', white=True)
debugger.compose_vis_add(
batch['meta']['image_path'][i], dets_pred[i], calib[i],
opt.center_thresh, pred, 'bird_pred_gt', img_id='out')
# debugger.add_img(img, img_id='out')
if opt.debug ==4:
debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
opt = self.opt
wh = output['wh'] if opt.reg_bbox else None
reg = output['reg'] if opt.reg_offset else None
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=opt.K)
# x, y, score, r1-r8, depth, dim1-dim3, cls
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
calib = batch['meta']['calib'].detach().numpy()
# x, y, score, rot, depth, dim1, dim2, dim3
dets_pred = ddd_post_process(
dets.copy(), batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
img_id = batch['meta']['img_id'].detach().numpy()[0]
results[img_id] = dets_pred[0]
for j in range(1, opt.num_classes + 1):
keep_inds = (results[img_id][j][:, -1] > opt.center_thresh)
results[img_id][j] = results[img_id][j][keep_inds]
================================================
FILE: src/lib/trains/exdet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
import cv2
import sys
import time
from utils.debugger import Debugger
from models.data_parallel import DataParallel
from models.losses import FocalLoss, RegL1Loss
from models.decode import agnex_ct_decode, exct_decode
from models.utils import _sigmoid
from .base_trainer import BaseTrainer
class ExdetLoss(torch.nn.Module):
def __init__(self, opt):
super(ExdetLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = RegL1Loss()
self.opt = opt
self.parts = ['t', 'l', 'b', 'r', 'c']
def forward(self, outputs, batch):
opt = self.opt
hm_loss, reg_loss = 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
for p in self.parts:
tag = 'hm_{}'.format(p)
output[tag] = _sigmoid(output[tag])
hm_loss += self.crit(output[tag], batch[tag]) / opt.num_stacks
if p != 'c' and opt.reg_offset and opt.off_weight > 0:
reg_loss += self.crit_reg(output['reg_{}'.format(p)],
batch['reg_mask'],
batch['ind_{}'.format(p)],
batch['reg_{}'.format(p)]) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.off_weight * reg_loss
loss_stats = {'loss': loss, 'off_loss': reg_loss, 'hm_loss': hm_loss}
return loss, loss_stats
class ExdetTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(ExdetTrainer, self).__init__(opt, model, optimizer=optimizer)
self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'off_loss']
loss = ExdetLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
detections = self.decode(output['hm_t'], output['hm_l'],
output['hm_b'], output['hm_r'],
output['hm_c']).detach().cpu().numpy()
detections[:, :, :4] *= opt.input_res / opt.output_res
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
pred_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
for p in self.parts:
tag = 'hm_{}'.format(p)
pred = debugger.gen_colormap(output[tag][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch[tag][i].detach().cpu().numpy())
if p != 'c':
pred_hm = np.maximum(pred_hm, pred)
gt_hm = np.maximum(gt_hm, gt)
if p == 'c' or opt.debug > 2:
debugger.add_blend_img(img, pred, 'pred_{}'.format(p))
debugger.add_blend_img(img, gt, 'gt_{}'.format(p))
debugger.add_blend_img(img, pred_hm, 'pred')
debugger.add_blend_img(img, gt_hm, 'gt')
debugger.add_img(img, img_id='out')
for k in range(len(detections[i])):
if detections[i, k, 4] > 0.1:
debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1],
detections[i, k, 4], img_id='out')
if opt.debug == 4:
debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
================================================
FILE: src/lib/trains/multi_pose.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss
from models.decode import multi_pose_decode
from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr
from utils.debugger import Debugger
from utils.post_process import multi_pose_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class MultiPoseLoss(torch.nn.Module):
def __init__(self, opt):
super(MultiPoseLoss, self).__init__()
self.crit = FocalLoss()
self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_kp = RegWeightedL1Loss() if not opt.dense_hp else \
torch.nn.L1Loss(reduction='sum')
self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
RegLoss() if opt.reg_loss == 'sl1' else None
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, wh_loss, off_loss = 0, 0, 0
hp_loss, off_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
output['hm'] = _sigmoid(output['hm'])
if opt.hm_hp and not opt.mse_loss:
output['hm_hp'] = _sigmoid(output['hm_hp'])
if opt.eval_oracle_hmhp:
output['hm_hp'] = batch['hm_hp']
if opt.eval_oracle_hm:
output['hm'] = batch['hm']
if opt.eval_oracle_kps:
if opt.dense_hp:
output['hps'] = batch['dense_hps']
else:
output['hps'] = torch.from_numpy(gen_oracle_map(
batch['hps'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
opt.output_res, opt.output_res)).to(opt.device)
if opt.eval_oracle_hp_offset:
output['hp_offset'] = torch.from_numpy(gen_oracle_map(
batch['hp_offset'].detach().cpu().numpy(),
batch['hp_ind'].detach().cpu().numpy(),
opt.output_res, opt.output_res)).to(opt.device)
hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
if opt.dense_hp:
mask_weight = batch['dense_hps_mask'].sum() + 1e-4
hp_loss += (self.crit_kp(output['hps'] * batch['dense_hps_mask'],
batch['dense_hps'] * batch['dense_hps_mask']) /
mask_weight) / opt.num_stacks
else:
hp_loss += self.crit_kp(output['hps'], batch['hps_mask'],
batch['ind'], batch['hps']) / opt.num_stacks
if opt.wh_weight > 0:
wh_loss += self.crit_reg(output['wh'], batch['reg_mask'],
batch['ind'], batch['wh']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
batch['ind'], batch['reg']) / opt.num_stacks
if opt.reg_hp_offset and opt.off_weight > 0:
hp_offset_loss += self.crit_reg(
output['hp_offset'], batch['hp_mask'],
batch['hp_ind'], batch['hp_offset']) / opt.num_stacks
if opt.hm_hp and opt.hm_hp_weight > 0:
hm_hp_loss += self.crit_hm_hp(
output['hm_hp'], batch['hm_hp']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
opt.off_weight * off_loss + opt.hp_weight * hp_loss + \
opt.hm_hp_weight * hm_hp_loss + opt.off_weight * hp_offset_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'hp_loss': hp_loss,
'hm_hp_loss': hm_hp_loss, 'hp_offset_loss': hp_offset_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class MultiPoseTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(MultiPoseTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'hp_loss', 'hm_hp_loss',
'hp_offset_loss', 'wh_loss', 'off_loss']
loss = MultiPoseLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
reg = output['reg'] if opt.reg_offset else None
hm_hp = output['hm_hp'] if opt.hm_hp else None
hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
dets = multi_pose_decode(
output['hm'], output['wh'], output['hps'],
reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets[:, :, :4] *= opt.input_res / opt.output_res
dets[:, :, 5:39] *= opt.input_res / opt.output_res
dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
dets_gt[:, :, :4] *= opt.input_res / opt.output_res
dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_blend_img(img, gt, 'gt_hm')
debugger.add_img(img, img_id='out_pred')
for k in range(len(dets[i])):
if dets[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
dets[i, k, 4], img_id='out_pred')
debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')
debugger.add_img(img, img_id='out_gt')
for k in range(len(dets_gt[i])):
if dets_gt[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
dets_gt[i, k, 4], img_id='out_gt')
debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')
if opt.hm_hp:
pred = debugger.gen_colormap_hp(output['hm_hp'][i].detach().cpu().numpy())
gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hmhp')
debugger.add_blend_img(img, gt, 'gt_hmhp')
if opt.debug == 4:
debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
reg = output['reg'] if self.opt.reg_offset else None
hm_hp = output['hm_hp'] if self.opt.hm_hp else None
hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
dets = multi_pose_decode(
output['hm'], output['wh'], output['hps'],
reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets_out = multi_pose_post_process(
dets.copy(), batch['meta']['c'].cpu().numpy(),
batch['meta']['s'].cpu().numpy(),
output['hm'].shape[2], output['hm'].shape[3])
results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
================================================
FILE: src/lib/trains/train_factory.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .ctdet import CtdetTrainer
from .ddd import DddTrainer
from .exdet import ExdetTrainer
from .multi_pose import MultiPoseTrainer
train_factory = {
'exdet': ExdetTrainer,
'ddd': DddTrainer,
'ctdet': CtdetTrainer,
'multi_pose': MultiPoseTrainer,
}
================================================
FILE: src/lib/utils/__init__.py
================================================
================================================
FILE: src/lib/utils/ddd_utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
def compute_box_3d(dim, location, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 8 x 3
c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
l, w, h = dim[2], dim[1], dim[0]
x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
y_corners = [0,0,0,0,-h,-h,-h,-h]
z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
corners_3d = np.dot(R, corners)
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
return corners_3d.transpose(1, 0)
def project_to_image(pts_3d, P):
# pts_3d: n x 3
# P: 3 x 4
# return: n x 2
pts_3d_homo = np.concatenate(
[pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
# import pdb; pdb.set_trace()
return pts_2d
def compute_orientation_3d(dim, location, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 2 x 3
c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
orientation_3d = np.dot(R, orientation_3d)
orientation_3d = orientation_3d + \
np.array(location, dtype=np.float32).reshape(3, 1)
return orientation_3d.transpose(1, 0)
def draw_box_3d(image, corners, c=(0, 0, 255)):
face_idx = [[0,1,5,4],
[1,2,6, 5],
[2,3,7,6],
[3,0,4,7]]
for ind_f in range(3, -1, -1):
f = face_idx[ind_f]
for j in range(4):
cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
(corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA)
if ind_f == 0:
cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
(corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
(corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
return image
def unproject_2d_to_3d(pt_2d, depth, P):
# pts_2d: 2
# depth: 1
# P: 3 x 4
# return: 3
z = depth - P[2, 3]
x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
pt_3d = np.array([x, y, z], dtype=np.float32)
return pt_3d
def alpha2rot_y(alpha, x, cx, fx):
"""
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
rot_y = alpha + np.arctan2(x - cx, fx)
if rot_y > np.pi:
rot_y -= 2 * np.pi
if rot_y < -np.pi:
rot_y += 2 * np.pi
return rot_y
def rot_y2alpha(rot_y, x, cx, fx):
"""
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
alpha = rot_y - np.arctan2(x - cx, fx)
if alpha > np.pi:
alpha -= 2 * np.pi
if alpha < -np.pi:
alpha += 2 * np.pi
return alpha
def ddd2locrot(center, alpha, dim, depth, calib):
# single image
locations = unproject_2d_to_3d(center, depth, calib)
locations[1] += dim[0] / 2
rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
return locations, rotation_y
def project_3d_bbox(location, dim, rotation_y, calib):
box_3d = compute_box_3d(dim, location, rotation_y)
box_2d = project_to_image(box_3d, calib)
return box_2d
if __name__ == '__main__':
calib = np.array(
[[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
[0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
[0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
dtype=np.float32)
alpha = -0.20
tl = np.array([712.40, 143.00], dtype=np.float32)
br = np.array([810.73, 307.92], dtype=np.float32)
ct = (tl + br) / 2
rotation_y = 0.01
print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
print('rotation_y', rotation_y)
================================================
FILE: src/lib/utils/debugger.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
from .ddd_utils import compute_box_3d, project_to_image, draw_box_3d
class Debugger(object):
def __init__(self, ipynb=False, theme='black',
num_classes=-1, dataset=None, down_ratio=4):
self.ipynb = ipynb
if not self.ipynb:
import matplotlib.pyplot as plt
self.plt = plt
self.imgs = {}
self.theme = theme
colors = [(color_list[_]).astype(np.uint8) \
for _ in range(len(color_list))]
self.colors = np.array(colors, dtype=np.uint8).reshape(len(colors), 1, 1, 3)
if self.theme == 'white':
self.colors = self.colors.reshape(-1)[::-1].reshape(len(colors), 1, 1, 3)
self.colors = np.clip(self.colors, 0., 0.6 * 255).astype(np.uint8)
self.dim_scale = 1
if dataset == 'coco_hp':
self.names = ['p']
self.num_class = 1
self.num_joints = 17
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
[3, 5], [4, 6], [5, 6],
[5, 7], [7, 9], [6, 8], [8, 10],
[5, 11], [6, 12], [11, 12],
[11, 13], [13, 15], [12, 14], [14, 16]]
self.ec = [(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255)]
self.colors_hp = [(255, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255)]
elif num_classes == 80 or dataset == 'coco':
self.names = coco_class_name
elif num_classes == 20 or dataset == 'pascal':
self.names = pascal_class_name
elif dataset == 'gta':
self.names = gta_class_name
self.focal_length = 935.3074360871937
self.W = 1920
self.H = 1080
self.dim_scale = 3
elif dataset == 'viper':
self.names = gta_class_name
self.focal_length = 1158
self.W = 1920
self.H = 1080
self.dim_scale = 3
elif num_classes == 3 or dataset == 'kitti':
self.names = kitti_class_name
self.focal_length = 721.5377
self.W = 1242
self.H = 375
num_classes = len(self.names)
self.down_ratio=down_ratio
# for bird view
self.world_size = 64
self.out_size = 384
def add_img(self, img, img_id='default', revert_color=False):
if revert_color:
img = 255 - img
self.imgs[img_id] = img.copy()
def add_mask(self, mask, bg, imgId = 'default', trans = 0.8):
self.imgs[imgId] = (mask.reshape(
mask.shape[0], mask.shape[1], 1) * 255 * trans + \
bg * (1 - trans)).astype(np.uint8)
def show_img(self, pause = False, imgId = 'default'):
cv2.imshow('{}'.format(imgId), self.imgs[imgId])
if pause:
cv2.waitKey()
def add_blend_img(self, back, fore, img_id='blend', trans=0.7):
if self.theme == 'white':
fore = 255 - fore
if fore.shape[0] != back.shape[0] or fore.shape[0] != back.shape[1]:
fore = cv2.resize(fore, (back.shape[1], back.shape[0]))
if len(fore.shape) == 2:
fore = fore.reshape(fore.shape[0], fore.shape[1], 1)
self.imgs[img_id] = (back * (1. - trans) + fore * trans)
self.imgs[img_id][self.imgs[img_id] > 255] = 255
self.imgs[img_id][self.imgs[img_id] < 0] = 0
self.imgs[img_id] = self.imgs[img_id].astype(np.uint8).copy()
'''
# slow version
def gen_colormap(self, img, output_res=None):
# num_classes = len(self.colors)
img[img < 0] = 0
h, w = img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
color_map = np.zeros((output_res[0], output_res[1], 3), dtype=np.uint8)
for i in range(img.shape[0]):
resized = cv2.resize(img[i], (output_res[1], output_res[0]))
resized = resized.reshape(output_res[0], output_res[1], 1)
cl = self.colors[i] if not (self.theme == 'white') \
else 255 - self.colors[i]
color_map = np.maximum(color_map, (resized * cl).astype(np.uint8))
return color_map
'''
def gen_colormap(self, img, output_res=None):
img = img.copy()
c, h, w = img.shape[0], img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
img = img.transpose(1, 2, 0).reshape(h, w, c, 1).astype(np.float32)
colors = np.array(
self.colors, dtype=np.float32).reshape(-1, 3)[:c].reshape(1, 1, c, 3)
if self.theme == 'white':
colors = 255 - colors
color_map = (img * colors).max(axis=2).astype(np.uint8)
color_map = cv2.resize(color_map, (output_res[0], output_res[1]))
return color_map
'''
# slow
def gen_colormap_hp(self, img, output_res=None):
# num_classes = len(self.colors)
# img[img < 0] = 0
h, w = img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
color_map = np.zeros((output_res[0], output_res[1], 3), dtype=np.uint8)
for i in range(img.shape[0]):
resized = cv2.resize(img[i], (output_res[1], output_res[0]))
resized = resized.reshape(output_res[0], output_res[1], 1)
cl = self.colors_hp[i] if not (self.theme == 'white') else \
(255 - np.array(self.colors_hp[i]))
color_map = np.maximum(color_map, (resized * cl).astype(np.uint8))
return color_map
'''
def gen_colormap_hp(self, img, output_res=None):
c, h, w = img.shape[0], img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
img = img.transpose(1, 2, 0).reshape(h, w, c, 1).astype(np.float32)
colors = np.array(
self.colors_hp, dtype=np.float32).reshape(-1, 3)[:c].reshape(1, 1, c, 3)
if self.theme == 'white':
colors = 255 - colors
color_map = (img * colors).max(axis=2).astype(np.uint8)
color_map = cv2.resize(color_map, (output_res[0], output_res[1]))
return color_map
def add_rect(self, rect1, rect2, c, conf=1, img_id='default'):
cv2.rectangle(
self.imgs[img_id], (rect1[0], rect1[1]), (rect2[0], rect2[1]), c, 2)
if conf < 1:
cv2.circle(self.imgs[img_id], (rect1[0], rect1[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect2[0], rect2[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect1[0], rect2[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect2[0], rect1[1]), int(10 * conf), c, 1)
def add_coco_bbox(self, bbox, cat, conf=1, show_txt=True, img_id='default'):
bbox = np.array(bbox, dtype=np.int32)
# cat = (int(cat) + 1) % 80
cat = int(cat)
# print('cat', cat, self.names[cat])
c = self.colors[cat][0][0].tolist()
if self.theme == 'white':
c = (255 - np.array(c)).tolist()
txt = '{}{:.1f}'.format(self.names[cat], conf)
font = cv2.FONT_HERSHEY_SIMPLEX
cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
cv2.rectangle(
self.imgs[img_id], (bbox[0], bbox[1]), (bbox[2], bbox[3]), c, 2)
if show_txt:
cv2.rectangle(self.imgs[img_id],
(bbox[0], bbox[1] - cat_size[1] - 2),
(bbox[0] + cat_size[0], bbox[1] - 2), c, -1)
cv2.putText(self.imgs[img_id], txt, (bbox[0], bbox[1] - 2),
font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
def add_coco_hp(self, points, img_id='default'):
points = np.array(points, dtype=np.int32).reshape(self.num_joints, 2)
for j in range(self.num_joints):
cv2.circle(self.imgs[img_id],
(points[j, 0], points[j, 1]), 3, self.colors_hp[j], -1)
for j, e in enumerate(self.edges):
if points[e].min() > 0:
cv2.line(self.imgs[img_id], (points[e[0], 0], points[e[0], 1]),
(points[e[1], 0], points[e[1], 1]), self.ec[j], 2,
lineType=cv2.LINE_AA)
def add_points(self, points, img_id='default'):
num_classes = len(points)
# assert num_classes == len(self.colors)
for i in range(num_classes):
for j in range(len(points[i])):
c = self.colors[i, 0, 0]
cv2.circle(self.imgs[img_id], (points[i][j][0] * self.down_ratio,
points[i][j][1] * self.down_ratio),
5, (255, 255, 255), -1)
cv2.circle(self.imgs[img_id], (points[i][j][0] * self.down_ratio,
points[i][j][1] * self.down_ratio),
3, (int(c[0]), int(c[1]), int(c[2])), -1)
def show_all_imgs(self, pause=False, time=0):
if not self.ipynb:
for i, v in self.imgs.items():
cv2.imshow('{}'.format(i), v)
if cv2.waitKey(0 if pause else 1) == 27:
import sys
sys.exit(0)
else:
self.ax = None
nImgs = len(self.imgs)
fig=self.plt.figure(figsize=(nImgs * 10,10))
nCols = nImgs
nRows = nImgs // nCols
for i, (k, v) in enumerate(self.imgs.items()):
fig.add_subplot(1, nImgs, i + 1)
if len(v.shape) == 3:
self.plt.imshow(cv2.cvtColor(v, cv2.COLOR_BGR2RGB))
else:
self.plt.imshow(v)
self.plt.show()
def save_img(self, imgId='default', path='./cache/debug/'):
cv2.imwrite(path + '{}.png'.format(imgId), self.imgs[imgId])
def save_all_imgs(self, path='./cache/debug/', prefix='', genID=False):
if genID:
try:
idx = int(np.loadtxt(path + '/id.txt'))
except:
idx = 0
prefix=idx
np.savetxt(path + '/id.txt', np.ones(1) * (idx + 1), fmt='%d')
for i, v in self.imgs.items():
cv2.imwrite(path + '/{}{}.png'.format(prefix, i), v)
def remove_side(self, img_id, img):
if not (img_id in self.imgs):
return
ws = img.sum(axis=2).sum(axis=0)
l = 0
while ws[l] == 0 and l < len(ws):
l+= 1
r = ws.shape[0] - 1
while ws[r] == 0 and r > 0:
r -= 1
hs = img.sum(axis=2).sum(axis=1)
t = 0
while hs[t] == 0 and t < len(hs):
t += 1
b = hs.shape[0] - 1
while hs[b] == 0 and b > 0:
b -= 1
self.imgs[img_id] = self.imgs[img_id][t:b+1, l:r+1].copy()
def project_3d_to_bird(self, pt):
pt[0] += self.world_size / 2
pt[1] = self.world_size - pt[1]
pt = pt * self.out_size / self.world_size
return pt.astype(np.int32)
def add_ct_detection(
self, img, dets, show_box=False, show_txt=True,
center_thresh=0.5, img_id='det'):
# dets: max_preds x 5
self.imgs[img_id] = img.copy()
if type(dets) == type({}):
for cat in dets:
for i in range(len(dets[cat])):
if dets[cat][i, 2] > center_thresh:
cl = (self.colors[cat, 0, 0]).tolist()
ct = dets[cat][i, :2].astype(np.int32)
if show_box:
w, h = dets[cat][i, -2], dets[cat][i, -1]
x, y = dets[cat][i, 0], dets[cat][i, 1]
bbox = np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2],
dtype=np.float32)
self.add_coco_bbox(
bbox, cat - 1, dets[cat][i, 2],
show_txt=show_txt, img_id=img_id)
else:
for i in range(len(dets)):
if dets[i, 2] > center_thresh:
# print('dets', dets[i])
cat = int(dets[i, -1])
cl = (self.colors[cat, 0, 0] if self.theme == 'black' else \
255 - self.colors[cat, 0, 0]).tolist()
ct = dets[i, :2].astype(np.int32) * self.down_ratio
cv2.circle(self.imgs[img_id], (ct[0], ct[1]), 3, cl, -1)
if show_box:
w, h = dets[i, -3] * self.down_ratio, dets[i, -2] * self.down_ratio
x, y = dets[i, 0] * self.down_ratio, dets[i, 1] * self.down_ratio
bbox = np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2],
dtype=np.float32)
self.add_coco_bbox(bbox, dets[i, -1], dets[i, 2], img_id=img_id)
def add_3d_detection(
self, image_or_path, dets, calib, show_txt=False,
center_thresh=0.5, img_id='det'):
if isinstance(image_or_path, np.ndarray):
self.imgs[img_id] = image_or_path
else:
self.imgs[img_id] = cv2.imread(image_or_path)
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
# loc[1] = loc[1] - dim[0] / 2 + dim[0] / 2 / self.dim_scale
# dim = dim / self.dim_scale
if loc[2] > 1:
box_3d = compute_box_3d(dim, loc, rot_y)
box_2d = project_to_image(box_3d, calib)
self.imgs[img_id] = draw_box_3d(self.imgs[img_id], box_2d, cl)
def compose_vis_add(
self, img_path, dets, calib,
center_thresh, pred, bev, img_id='out'):
self.imgs[img_id] = cv2.imread(img_path)
# h, w = self.imgs[img_id].shape[:2]
# pred = cv2.resize(pred, (h, w))
h, w = pred.shape[:2]
hs, ws = self.imgs[img_id].shape[0] / h, self.imgs[img_id].shape[1] / w
self.imgs[img_id] = cv2.resize(self.imgs[img_id], (w, h))
self.add_blend_img(self.imgs[img_id], pred, img_id)
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
# loc[1] = loc[1] - dim[0] / 2 + dim[0] / 2 / self.dim_scale
# dim = dim / self.dim_scale
if loc[2] > 1:
box_3d = compute_box_3d(dim, loc, rot_y)
box_2d = project_to_image(box_3d, calib)
box_2d[:, 0] /= hs
box_2d[:, 1] /= ws
self.imgs[img_id] = draw_box_3d(self.imgs[img_id], box_2d, cl)
self.imgs[img_id] = np.concatenate(
[self.imgs[img_id], self.imgs[bev]], axis=1)
def add_2d_detection(
self, img, dets, show_box=False, show_txt=True,
center_thresh=0.5, img_id='det'):
self.imgs[img_id] = img
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
bbox = dets[cat][i, 1:5]
self.add_coco_bbox(
bbox, cat - 1, dets[cat][i, -1],
show_txt=show_txt, img_id=img_id)
def add_bird_view(self, dets, center_thresh=0.3, img_id='bird'):
bird_view = np.ones((self.out_size, self.out_size, 3), dtype=np.uint8) * 230
for cat in dets:
cl = (self.colors[cat - 1, 0, 0]).tolist()
lc = (250, 152, 12)
for i in range(len(dets[cat])):
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
rect = compute_box_3d(dim, loc, rot_y)[:4, [0, 2]]
for k in range(4):
rect[k] = self.project_3d_to_bird(rect[k])
# cv2.circle(bird_view, (rect[k][0], rect[k][1]), 2, lc, -1)
cv2.polylines(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
True,lc,2,lineType=cv2.LINE_AA)
for e in [[0, 1]]:
t = 4 if e == [0, 1] else 1
cv2.line(bird_view, (rect[e[0]][0], rect[e[0]][1]),
(rect[e[1]][0], rect[e[1]][1]), lc, t,
lineType=cv2.LINE_AA)
self.imgs[img_id] = bird_view
def add_bird_views(self, dets_dt, dets_gt, center_thresh=0.3, img_id='bird'):
alpha = 0.5
bird_view = np.ones((self.out_size, self.out_size, 3), dtype=np.uint8) * 230
for ii, (dets, lc, cc) in enumerate(
[(dets_gt, (12, 49, 250), (0, 0, 255)),
(dets_dt, (250, 152, 12), (255, 0, 0))]):
# cc = np.array(lc, dtype=np.uint8).reshape(1, 1, 3)
for cat in dets:
cl = (self.colors[cat - 1, 0, 0]).tolist()
for i in range(len(dets[cat])):
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
rect = compute_box_3d(dim, loc, rot_y)[:4, [0, 2]]
for k in range(4):
rect[k] = self.project_3d_to_bird(rect[k])
if ii == 0:
cv2.fillPoly(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
lc,lineType=cv2.LINE_AA)
else:
cv2.polylines(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
True,lc,2,lineType=cv2.LINE_AA)
# for e in [[0, 1], [1, 2], [2, 3], [3, 0]]:
for e in [[0, 1]]:
t = 4 if e == [0, 1] else 1
cv2.line(bird_view, (rect[e[0]][0], rect[e[0]][1]),
(rect[e[1]][0], rect[e[1]][1]), lc, t,
lineType=cv2.LINE_AA)
self.imgs[img_id] = bird_view
kitti_class_name = [
'p', 'v', 'b'
]
gta_class_name = [
'p', 'v'
]
pascal_class_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
"car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
"person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
coco_class_name = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
color_list = np.array(
[
1.000, 1.000, 1.000,
0.850, 0.325, 0.098,
0.929, 0.694, 0.125,
0.494, 0.184, 0.556,
0.466, 0.674, 0.188,
0.301, 0.745, 0.933,
0.635, 0.078, 0.184,
0.300, 0.300, 0.300,
0.600, 0.600, 0.600,
1.000, 0.000, 0.000,
1.000, 0.500, 0.000,
0.749, 0.749, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 1.000,
0.667, 0.000, 1.000,
0.333, 0.333, 0.000,
0.333, 0.667, 0.000,
0.333, 1.000, 0.000,
0.667, 0.333, 0.000,
0.667, 0.667, 0.000,
0.667, 1.000, 0.000,
1.000, 0.333, 0.000,
1.000, 0.667, 0.000,
1.000, 1.000, 0.000,
0.000, 0.333, 0.500,
0.000, 0.667, 0.500,
0.000, 1.000, 0.500,
0.333, 0.000, 0.500,
0.333, 0.333, 0.500,
0.333, 0.667, 0.500,
0.333, 1.000, 0.500,
0.667, 0.000, 0.500,
0.667, 0.333, 0.500,
0.667, 0.667, 0.500,
0.667, 1.000, 0.500,
1.000, 0.000, 0.500,
1.000, 0.333, 0.500,
1.000, 0.667, 0.500,
1.000, 1.000, 0.500,
0.000, 0.333, 1.000,
0.000, 0.667, 1.000,
0.000, 1.000, 1.000,
0.333, 0.000, 1.000,
0.333, 0.333, 1.000,
0.333, 0.667, 1.000,
0.333, 1.000, 1.000,
0.667, 0.000, 1.000,
0.667, 0.333, 1.000,
0.667, 0.667, 1.000,
0.667, 1.000, 1.000,
1.000, 0.000, 1.000,
1.000, 0.333, 1.000,
1.000, 0.667, 1.000,
0.167, 0.000, 0.000,
0.333, 0.000, 0.000,
0.500, 0.000, 0.000,
0.667, 0.000, 0.000,
0.833, 0.000, 0.000,
1.000, 0.000, 0.000,
0.000, 0.167, 0.000,
0.000, 0.333, 0.000,
0.000, 0.500, 0.000,
0.000, 0.667, 0.000,
0.000, 0.833, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 0.167,
0.000, 0.000, 0.333,
0.000, 0.000, 0.500,
0.000, 0.000, 0.667,
0.000, 0.000, 0.833,
0.000, 0.000, 1.000,
0.000, 0.000, 0.000,
0.143, 0.143, 0.143,
0.286, 0.286, 0.286,
0.429, 0.429, 0.429,
0.571, 0.571, 0.571,
0.714, 0.714, 0.714,
0.857, 0.857, 0.857,
0.000, 0.447, 0.741,
0.50, 0.5, 0
]
).astype(np.float32)
color_list = color_list.reshape((-1, 3)) * 255
================================================
FILE: src/lib/utils/image.py
================================================
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
import random
def flip(img):
return img[:, :, ::-1].copy()
def transform_preds(coords, center, scale, output_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale, 0, output_size, inv=1)
for p in range(coords.shape[0]):
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
return target_coords
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
scale = np.array([scale, scale], dtype=np.float32)
scale_tmp = scale
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def crop(img, center, scale, output_size, rot=0):
trans = get_affine_transform(center, scale, rot, output_size)
dst_img = cv2.warpAffine(img,
trans,
(int(output_size[0]), int(output_size[1])),
flags=cv2.INTER_LINEAR)
return dst_img
def gaussian_radius(det_size, min_overlap=0.7):
height, width = det_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
r1 = (b1 + sq1) / 2
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
r2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
return min(r1, r2, r3)
def gaussian2D(shape, sigma=1):
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m+1,-n:n+1]
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def draw_umich_gaussian(heatmap, center, radius, k=1):
diameter = 2 * radius + 1
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
diameter = 2 * radius + 1
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
dim = value.shape[0]
reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
if is_offset and dim == 2:
delta = np.arange(diameter*2+1) - radius
reg[0] = reg[0] - delta.reshape(1, -1)
reg[1] = reg[1] - delta.reshape(-1, 1)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom,
radius - left:radius + right]
masked_reg = reg[:, radius - top:radius + bottom,
radius - left:radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
idx = (masked_gaussian >= masked_heatmap).reshape(
1, masked_gaussian.shape[0], masked_gaussian.shape[1])
masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
return regmap
def draw_msra_gaussian(heatmap, center, sigma):
tmp_size = sigma * 3
mu_x = int(center[0] + 0.5)
mu_y = int(center[1] + 0.5)
w, h = heatmap.shape[0], heatmap.shape[1]
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
return heatmap
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, np.newaxis]
x0 = y0 = size // 2
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
img_x = max(0, ul[0]), min(br[0], h)
img_y = max(0, ul[1]), min(br[1], w)
heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
return heatmap
def grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def lighting_(data_rng, image, alphastd, eigval, eigvec):
alpha = data_rng.normal(scale=alphastd, size=(3, ))
image += np.dot(eigvec, eigval * alpha)
def blend_(alpha, image1, image2):
image1 *= alpha
image2 *= (1 - alpha)
image1 += image2
def saturation_(data_rng, image, gs, gs_mean, var):
alpha = 1. + data_rng.uniform(low=-var, high=var)
blend_(alpha, image, gs[:, :, None])
def brightness_(data_rng, image, gs, gs_mean, var):
alpha = 1. + data_rng.uniform(low=-var, high=var)
image *= alpha
def contrast_(data_rng, image, gs, gs_mean, var):
alpha = 1. + data_rng.uniform(low=-var, high=var)
blend_(alpha, image, gs_mean)
def color_aug(data_rng, image, eig_val, eig_vec):
functions = [brightness_, contrast_, saturation_]
random.shuffle(functions)
gs = grayscale(image)
gs_mean = gs.mean()
for f in functions:
f(data_rng, image, gs, gs_mean, 0.4)
lighting_(data_rng, image, 0.1, eig_val, eig_vec)
================================================
FILE: src/lib/utils/oracle_utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numba
@numba.jit(nopython=True, nogil=True)
def gen_oracle_map(feat, ind, w, h):
# feat: B x maxN x featDim
# ind: B x maxN
batch_size = feat.shape[0]
max_objs = feat.shape[1]
feat_dim = feat.shape[2]
out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
vis = np.zeros((batch_size, h, w), dtype=np.uint8)
ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
for i in range(batch_size):
queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
head, tail = 0, 0
for j in range(max_objs):
if ind[i][j] > 0:
x, y = ind[i][j] % w, ind[i][j] // w
out[i, :, y, x] = feat[i][j]
vis[i, y, x] = 1
queue_ind[tail] = x, y
queue_feat[tail] = feat[i][j]
tail += 1
while tail - head > 0:
x, y = queue_ind[head]
f = queue_feat[head]
head += 1
for (dx, dy) in ds:
xx, yy = x + dx, y + dy
if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
out[i, :, yy, xx] = f
vis[i, yy, xx] = 1
queue_ind[tail] = xx, yy
queue_feat[tail] = f
tail += 1
return out
================================================
FILE: src/lib/utils/post_process.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from .image import transform_preds
from .ddd_utils import ddd2locrot
def get_pred_depth(depth):
return depth
def get_alpha(rot):
# output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
# bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
# return rot[:, 0]
idx = rot[:, 1] > rot[:, 5]
alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
return alpha1 * idx + alpha2 * (1 - idx)
def ddd_post_process_2d(dets, c, s, opt):
# dets: batch x max_dets x dim
# return 1-based class det list
ret = []
include_wh = dets.shape[2] > 16
for i in range(dets.shape[0]):
top_preds = {}
dets[i, :, :2] = transform_preds(
dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h))
classes = dets[i, :, -1]
for j in range(opt.num_classes):
inds = (classes == j)
top_preds[j + 1] = np.concatenate([
dets[i, inds, :3].astype(np.float32),
get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32),
get_pred_depth(dets[i, inds, 11:12]).astype(np.float32),
dets[i, inds, 12:15].astype(np.float32)], axis=1)
if include_wh:
top_preds[j + 1] = np.concatenate([
top_preds[j + 1],
transform_preds(
dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h))
.astype(np.float32)], axis=1)
ret.append(top_preds)
return ret
def ddd_post_process_3d(dets, calibs):
# dets: batch x max_dets x dim
# return 1-based class det list
ret = []
for i in range(len(dets)):
preds = {}
for cls_ind in dets[i].keys():
preds[cls_ind] = []
for j in range(len(dets[i][cls_ind])):
center = dets[i][cls_ind][j][:2]
score = dets[i][cls_ind][j][2]
alpha = dets[i][cls_ind][j][3]
depth = dets[i][cls_ind][j][4]
dimensions = dets[i][cls_ind][j][5:8]
wh = dets[i][cls_ind][j][8:10]
locations, rotation_y = ddd2locrot(
center, alpha, dimensions, depth, calibs[0])
bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2,
center[0] + wh[0] / 2, center[1] + wh[1] / 2]
pred = [alpha] + bbox + dimensions.tolist() + \
locations.tolist() + [rotation_y, score]
preds[cls_ind].append(pred)
preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32)
ret.append(preds)
return ret
def ddd_post_process(dets, c, s, calibs, opt):
# dets: batch x max_dets x dim
# return 1-based class det list
dets = ddd_post_process_2d(dets, c, s, opt)
dets = ddd_post_process_3d(dets, calibs)
return dets
def ctdet_post_process(dets, c, s, h, w, num_classes):
# dets: batch x max_dets x dim
# return 1-based class det dict
ret = []
for i in range(dets.shape[0]):
top_preds = {}
dets[i, :, :2] = transform_preds(
dets[i, :, 0:2], c[i], s[i], (w, h))
dets[i, :, 2:4] = transform_preds(
dets[i, :, 2:4], c[i], s[i], (w, h))
classes = dets[i, :, -1]
for j in range(num_classes):
inds = (classes == j)
top_preds[j + 1] = np.concatenate([
dets[i, inds, :4].astype(np.float32),
dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
ret.append(top_preds)
return ret
def multi_pose_post_process(dets, c, s, h, w):
# dets: batch x max_dets x 40
# return list of 39 in image coord
ret = []
for i in range(dets.shape[0]):
bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
top_preds = np.concatenate(
[bbox.reshape(-1, 4), dets[i, :, 4:5],
pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist()
ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
return ret
================================================
FILE: src/lib/utils/utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
if self.count > 0:
self.avg = self.sum / self.count
================================================
FILE: src/main.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import torch
import torch.utils.data
from opts import opts
from models.model import create_model, load_model, save_model
from models.data_parallel import DataParallel
from logger import Logger
from datasets.dataset_factory import get_dataset
from trains.train_factory import train_factory
def main(opt):
torch.manual_seed(opt.seed)
torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
Dataset = get_dataset(opt.dataset, opt.task)
opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
print(opt)
logger = Logger(opt)
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
print('Creating model...')
model = create_model(opt.arch, opt.heads, opt.head_conv)
optimizer = torch.optim.Adam(model.parameters(), opt.lr)
start_epoch = 0
if opt.load_model != '':
model, optimizer, start_epoch = load_model(
model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step)
Trainer = train_factory[opt.task]
trainer = Trainer(opt, model, optimizer)
trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
print('Setting up data...')
val_loader = torch.utils.data.DataLoader(
Dataset(opt, 'val'),
batch_size=1,
shuffle=False,
num_workers=1,
pin_memory=True
)
if opt.test:
_, preds = trainer.val(0, val_loader)
val_loader.dataset.run_eval(preds, opt.save_dir)
return
train_loader = torch.utils.data.DataLoader(
Dataset(opt, 'train'),
batch_size=opt.batch_size,
shuffle=True,
num_workers=opt.num_workers,
pin_memory=True,
drop_last=True
)
print('Starting training...')
best = 1e10
for epoch in range(start_epoch + 1, opt.num_epochs + 1):
mark = epoch if opt.save_all else 'last'
log_dict_train, _ = trainer.train(epoch, train_loader)
logger.write('epoch: {} |'.format(epoch))
for k, v in log_dict_train.items():
logger.scalar_summary('train_{}'.format(k), v, epoch)
logger.write('{} {:8f} | '.format(k, v))
if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
epoch, model, optimizer)
with torch.no_grad():
log_dict_val, preds = trainer.val(epoch, val_loader)
for k, v in log_dict_val.items():
logger.scalar_summary('val_{}'.format(k), v, epoch)
logger.write('{} {:8f} | '.format(k, v))
if log_dict_val[opt.metric] < best:
best = log_dict_val[opt.metric]
save_model(os.path.join(opt.save_dir, 'model_best.pth'),
epoch, model)
else:
save_model(os.path.join(opt.save_dir, 'model_last.pth'),
epoch, model, optimizer)
logger.write('\n')
if epoch in opt.lr_step:
save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
epoch, model, optimizer)
lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
print('Drop LR to', lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
logger.close()
if __name__ == '__main__':
opt = opts().parse()
main(opt)
================================================
FILE: src/test.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import json
import cv2
import numpy as np
import time
from progress.bar import Bar
import torch
from external.nms import soft_nms
from opts import opts
from logger import Logger
from utils.utils import AverageMeter
from datasets.dataset_factory import dataset_factory
from detectors.detector_factory import detector_factory
class PrefetchDataset(torch.utils.data.Dataset):
def __init__(self, opt, dataset, pre_process_func):
self.images = dataset.images
self.load_image_func = dataset.coco.loadImgs
self.img_dir = dataset.img_dir
self.pre_process_func = pre_process_func
self.opt = opt
def __getitem__(self, index):
img_id = self.images[index]
img_info = self.load_image_func(ids=[img_id])[0]
img_path = os.path.join(self.img_dir, img_info['file_name'])
image = cv2.imread(img_path)
images, meta = {}, {}
for scale in opt.test_scales:
if opt.task == 'ddd':
images[scale], meta[scale] = self.pre_process_func(
image, scale, img_info['calib'])
else:
images[scale], meta[scale] = self.pre_process_func(image, scale)
return img_id, {'images': images, 'image': image, 'meta': meta}
def __len__(self):
return len(self.images)
def prefetch_test(opt):
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
Dataset = dataset_factory[opt.dataset]
opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
print(opt)
Logger(opt)
Detector = detector_factory[opt.task]
split = 'val' if not opt.trainval else 'test'
dataset = Dataset(opt, split)
detector = Detector(opt)
data_loader = torch.utils.data.DataLoader(
PrefetchDataset(opt, dataset, detector.pre_process),
batch_size=1, shuffle=False, num_workers=1, pin_memory=True)
results = {}
num_iters = len(dataset)
bar = Bar('{}'.format(opt.exp_id), max=num_iters)
time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
avg_time_stats = {t: AverageMeter() for t in time_stats}
for ind, (img_id, pre_processed_images) in enumerate(data_loader):
ret = detector.run(pre_processed_images)
results[img_id.numpy().astype(np.int32)[0]] = ret['results']
Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
for t in avg_time_stats:
avg_time_stats[t].update(ret[t])
Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format(
t, tm = avg_time_stats[t])
bar.next()
bar.finish()
dataset.run_eval(results, opt.save_dir)
def test(opt):
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
Dataset = dataset_factory[opt.dataset]
opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
print(opt)
Logger(opt)
Detector = detector_factory[opt.task]
split = 'val' if not opt.trainval else 'test'
dataset = Dataset(opt, split)
detector = Detector(opt)
results = {}
num_iters = len(dataset)
bar = Bar('{}'.format(opt.exp_id), max=num_iters)
time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
avg_time_stats = {t: AverageMeter() for t in time_stats}
for ind in range(num_iters):
img_id = dataset.images[ind]
img_info = dataset.coco.loadImgs(ids=[img_id])[0]
img_path = os.path.join(dataset.img_dir, img_info['file_name'])
if opt.task == 'ddd':
ret = detector.run(img_path, img_info['calib'])
else:
ret = detector.run(img_path)
results[img_id] = ret['results']
Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
for t in avg_time_stats:
avg_time_stats[t].update(ret[t])
Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
bar.next()
bar.finish()
dataset.run_eval(results, opt.save_dir)
if __name__ == '__main__':
opt = opts().parse()
if opt.not_prefetch_test:
test(opt)
else:
prefetch_test(opt)
================================================
FILE: src/tools/_init_paths.py
================================================
import os.path as osp
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = osp.dirname(__file__)
# Add lib to PYTHONPATH
lib_path = osp.join(this_dir, '../lib')
add_path(lib_path)
================================================
FILE: src/tools/calc_coco_overlap.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as COCO
import cv2
import numpy as np
from pycocotools import mask as maskUtils
ANN_PATH = '../../data/coco/annotations/'
IMG_PATH = '../../data/coco/'
ANN_FILES = {'train': 'instances_train2017.json',
'val': 'instances_val2017.json'}
DEBUG = False
RESIZE = True
class_name = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def iou(box1, box2):
area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
inter = max(min(box1[2], box2[2]) - max(box1[0], box2[0]) + 1, 0) * \
max(min(box1[3], box2[3]) - max(box1[1], box2[1]) + 1, 0)
iou = 1.0 * inter / (area1 + area2 - inter)
return iou
def generate_anchors(
stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
):
"""Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return _generate_anchors(
stride,
np.array(sizes, dtype=np.float) / stride,
np.array(aspect_ratios, dtype=np.float)
)
def _generate_anchors(base_size, scales, aspect_ratios):
"""Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
"""
anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
anchors = _ratio_enum(anchor, aspect_ratios)
anchors = np.vstack(
[_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
)
return anchors
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window)."""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack(
(
x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)
)
)
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _coco_box_to_bbox(box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def count_agnostic(split):
coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
images = coco.getImgIds()
cnt = 0
for img_id in images:
ann_ids = coco.getAnnIds(imgIds=[img_id])
anns = coco.loadAnns(ids=ann_ids)
centers = []
for ann in anns:
bbox = ann['bbox']
center = ((bbox[0] + bbox[2] / 2) // 4, (bbox[1] + bbox[3] / 2) // 4)
for c in centers:
if center[0] == c[0] and center[1] == c[1]:
cnt += 1
centers.append(center)
print('find {} collisions!'.format(cnt))
def count(split):
coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
images = coco.getImgIds()
cnt = 0
obj = 0
for img_id in images:
ann_ids = coco.getAnnIds(imgIds=[img_id])
anns = coco.loadAnns(ids=ann_ids)
centers = []
obj += len(anns)
for ann in anns:
if ann['iscrowd'] > 0:
continue
bbox = ann['bbox']
center = ((bbox[0] + bbox[2] / 2) // 4, (bbox[1] + bbox[3] / 2) // 4, ann['category_id'], bbox)
for c in centers:
if center[0] == c[0] and center[1] == c[1] and center[2] == c[2] and \
iou(_coco_box_to_bbox(bbox), _coco_box_to_bbox(c[3])) < 2:# 0.5:
cnt += 1
if DEBUG:
file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
x1, y1 = int(c[3][0]), int(c[3][1]),
x2, y2 = int(c[3][0] + c[3][2]), int(c[3][1] + c[3][3])
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2, cv2.LINE_AA)
x1, y1 = int(center[3][0]), int(center[3][1]),
x2, y2 = int(center[3][0] + center[3][2]), int(center[3][1] + center[3][3])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2, cv2.LINE_AA)
cv2.imshow('img', img)
cv2.waitKey()
centers.append(center)
print('find {} collisions of {} objects!'.format(cnt, obj))
def count_iou(split):
coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
images = coco.getImgIds()
cnt = 0
obj = 0
for img_id in images:
ann_ids = coco.getAnnIds(imgIds=[img_id])
anns = coco.loadAnns(ids=ann_ids)
bboxes = []
obj += len(anns)
for ann in anns:
if ann['iscrowd'] > 0:
continue
bbox = _coco_box_to_bbox(ann['bbox']).tolist() + [ann['category_id']]
for b in bboxes:
if iou(b, bbox) > 0.5 and b[4] == bbox[4]:
cnt += 1
if DEBUG:
file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
x1, y1 = int(b[0]), int(b[1]),
x2, y2 = int(b[2]), int(b[3])
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2, cv2.LINE_AA)
x1, y1 = int(bbox[0]), int(bbox[1]),
x2, y2 = int(bbox[2]), int(bbox[3])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2, cv2.LINE_AA)
cv2.imshow('img', img)
print('cats', class_name[b[4]], class_name[bbox[4]])
cv2.waitKey()
bboxes.append(bbox)
print('find {} collisions of {} objects!'.format(cnt, obj))
def count_anchor(split):
coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
images = coco.getImgIds()
cnt = 0
obj = 0
stride = 16
anchor = generate_anchors().reshape(15, 2, 2)
miss_s, miss_m, miss_l = 0, 0, 0
N = len(images)
print(N, 'images')
for ind, img_id in enumerate(images):
if ind % 1000 == 0:
print(ind, N)
anchors = []
ann_ids = coco.getAnnIds(imgIds=[img_id])
anns = coco.loadAnns(ids=ann_ids)
obj += len(anns)
img_info = coco.loadImgs(ids=[img_id])[0]
h, w = img_info['height'], img_info['width']
if RESIZE:
if h > w:
for i in range(len(anns)):
anns[i]['bbox'][0] *= 800 / w
anns[i]['bbox'][1] *= 800 / w
anns[i]['bbox'][2] *= 800 / w
anns[i]['bbox'][3] *= 800 / w
h = h * 800 // w
w = 800
else:
for i in range(len(anns)):
anns[i]['bbox'][0] *= 800 / h
anns[i]['bbox'][1] *= 800 / h
anns[i]['bbox'][2] *= 800 / h
anns[i]['bbox'][3] *= 800 / h
w = w * 800 // h
h = 800
for i in range(w // stride):
for j in range(h // stride):
ct = np.array([i * stride, j * stride], dtype=np.float32).reshape(1, 1, 2)
anchors.append(anchor + ct)
anchors = np.concatenate(anchors, axis=0).reshape(-1, 4)
anchors[:, 2:4] = anchors[:, 2:4] - anchors[:, 0:2]
anchors = anchors.tolist()
# import pdb; pdb.set_trace()
g = [g['bbox'] for g in anns]
iscrowd = [int(o['iscrowd']) for o in anns]
ious = maskUtils.iou(anchors,g,iscrowd)
for t in range(len(g)):
if ious[:, t].max() < 0.5:
s = anns[t]['area']
if s < 32 ** 2:
miss_s += 1
elif s < 96 ** 2:
miss_m += 1
else:
miss_l += 1
if DEBUG:
file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
if RESIZE:
img = cv2.resize(img, (w, h))
for t, gt in enumerate(g):
if anns[t]['iscrowd'] > 0:
continue
x1, y1, x2, y2 = _coco_box_to_bbox(gt)
cl = (0, 0, 255) if ious[:, t].max() < 0.5 else (0, 255, 0)
cv2.rectangle(img, (x1, y1), (x2, y2), cl, 2, cv2.LINE_AA)
for k in range(len(anchors)):
if ious[k, t] > 0.5:
x1, y1, x2, y2 = _coco_box_to_bbox(anchors[k])
cl = (np.array([255, 0, 0]) * ious[k, t]).astype(np.int32).tolist()
cv2.rectangle(img, (x1, y1), (x2, y2), cl, 1, cv2.LINE_AA)
cv2.imshow('img', img)
cv2.waitKey()
miss = 0
if len(ious) > 0:
miss = (ious.max(axis=0) < 0.5).sum()
cnt += miss
print('cnt, obj, ratio ', cnt, obj, cnt / obj)
print('s, m, l ', miss_s, miss_m, miss_l)
# import pdb; pdb.set_trace()
def count_size(split):
coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
images = coco.getImgIds()
cnt = 0
obj = 0
stride = 16
anchor = generate_anchors().reshape(15, 2, 2)
cnt_s, cnt_m, cnt_l = 0, 0, 0
N = len(images)
print(N, 'images')
for ind, img_id in enumerate(images):
anchors = []
ann_ids = coco.getAnnIds(imgIds=[img_id])
anns = coco.loadAnns(ids=ann_ids)
obj += len(anns)
img_info = coco.loadImgs(ids=[img_id])[0]
for t in range(len(anns)):
if 1:
s = anns[t]['area']
if s < 32 ** 2:
cnt_s += 1
elif s < 96 ** 2:
cnt_m += 1
else:
cnt_l += 1
cnt += 1
print('cnt', cnt)
print('s, m, l ', cnt_s, cnt_m, cnt_l)
# count_iou('train')
# count_anchor('train')
# count('train')
count_size('train')
================================================
FILE: src/tools/convert_hourglass_weight.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
MODEL_PATH = '../../models/ExtremeNet_500000.pkl'
OUT_PATH = '../../models/ExtremeNet_500000.pth'
import torch
state_dict = torch.load(MODEL_PATH)
key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \
'r_heats': 'hm_r', 'ct_heats': 'hm_c', \
't_regrs': 'reg_t', 'l_regrs': 'reg_l', \
'b_regrs': 'reg_b', 'r_regrs': 'reg_r'}
out = {}
for k in state_dict.keys():
changed = False
for m in key_map.keys():
if m in k:
if 'ct_heats' in k and m == 't_heats':
continue
new_k = k.replace(m, key_map[m])
out[new_k] = state_dict[k]
changed = True
print('replace {} to {}'.format(k, new_k))
if not changed:
out[k] = state_dict[k]
data = {'epoch': 0,
'state_dict': out}
torch.save(data, OUT_PATH)
================================================
FILE: src/tools/convert_kitti_to_coco.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pickle
import json
import numpy as np
import cv2
DATA_PATH = '../../data/kitti/'
DEBUG = False
# VAL_PATH = DATA_PATH + 'training/label_val/'
import os
SPLITS = ['3dop', 'subcnn']
import _init_paths
from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
'''
#Values Name Description
----------------------------------------------------------------------------
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
1 score Only for results: Float, indicating confidence in
detection, needed for p/r curves, higher is better.
'''
def _bbox_to_coco_bbox(bbox):
return [(bbox[0]), (bbox[1]),
(bbox[2] - bbox[0]), (bbox[3] - bbox[1])]
def read_clib(calib_path):
f = open(calib_path, 'r')
for i, line in enumerate(f):
if i == 2:
calib = np.array(line[:-1].split(' ')[1:], dtype=np.float32)
calib = calib.reshape(3, 4)
return calib
cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting',
'Tram', 'Misc', 'DontCare']
cat_ids = {cat: i + 1 for i, cat in enumerate(cats)}
# cat_info = [{"name": "pedestrian", "id": 1}, {"name": "vehicle", "id": 2}]
F = 721
H = 384 # 375
W = 1248 # 1242
EXT = [45.75, -0.34, 0.005]
CALIB = np.array([[F, 0, W / 2, EXT[0]], [0, F, H / 2, EXT[1]],
[0, 0, 1, EXT[2]]], dtype=np.float32)
cat_info = []
for i, cat in enumerate(cats):
cat_info.append({'name': cat, 'id': i + 1})
for SPLIT in SPLITS:
image_set_path = DATA_PATH + 'ImageSets_{}/'.format(SPLIT)
ann_dir = DATA_PATH + 'training/label_2/'
calib_dir = DATA_PATH + '{}/calib/'
splits = ['train', 'val']
# splits = ['trainval', 'test']
calib_type = {'train': 'training', 'val': 'training', 'trainval': 'training',
'test': 'testing'}
for split in splits:
ret = {'images': [], 'annotations': [], "categories": cat_info}
image_set = open(image_set_path + '{}.txt'.format(split), 'r')
image_to_id = {}
for line in image_set:
if line[-1] == '\n':
line = line[:-1]
image_id = int(line)
calib_path = calib_dir.format(calib_type[split]) + '{}.txt'.format(line)
calib = read_clib(calib_path)
image_info = {'file_name': '{}.png'.format(line),
'id': int(image_id),
'calib': calib.tolist()}
ret['images'].append(image_info)
if split == 'test':
continue
ann_path = ann_dir + '{}.txt'.format(line)
# if split == 'val':
# os.system('cp {} {}/'.format(ann_path, VAL_PATH))
anns = open(ann_path, 'r')
if DEBUG:
image = cv2.imread(
DATA_PATH + 'images/trainval/' + image_info['file_name'])
for ann_ind, txt in enumerate(anns):
tmp = txt[:-1].split(' ')
cat_id = cat_ids[tmp[0]]
truncated = int(float(tmp[1]))
occluded = int(tmp[2])
alpha = float(tmp[3])
bbox = [float(tmp[4]), float(tmp[5]), float(tmp[6]), float(tmp[7])]
dim = [float(tmp[8]), float(tmp[9]), float(tmp[10])]
location = [float(tmp[11]), float(tmp[12]), float(tmp[13])]
rotation_y = float(tmp[14])
ann = {'image_id': image_id,
'id': int(len(ret['annotations']) + 1),
'category_id': cat_id,
'dim': dim,
'bbox': _bbox_to_coco_bbox(bbox),
'depth': location[2],
'alpha': alpha,
'truncated': truncated,
'occluded': occluded,
'location': location,
'rotation_y': rotation_y}
ret['annotations'].append(ann)
if DEBUG and tmp[0] != 'DontCare':
box_3d = compute_box_3d(dim, location, rotation_y)
box_2d = project_to_image(box_3d, calib)
# print('box_2d', box_2d)
image = draw_box_3d(image, box_2d)
x = (bbox[0] + bbox[2]) / 2
'''
print('rot_y, alpha2rot_y, dlt', tmp[0],
rotation_y, alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0]),
np.cos(
rotation_y - alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0])))
'''
depth = np.array([location[2]], dtype=np.float32)
pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
dtype=np.float32)
pt_3d = unproject_2d_to_3d(pt_2d, depth, calib)
pt_3d[1] += dim[0] / 2
print('pt_3d', pt_3d)
print('location', location)
if DEBUG:
cv2.imshow('image', image)
cv2.waitKey()
print("# images: ", len(ret['images']))
print("# annotations: ", len(ret['annotations']))
# import pdb; pdb.set_trace()
out_path = '{}/annotations/kitti_{}_{}.json'.format(DATA_PATH, SPLIT, split)
json.dump(ret, open(out_path, 'w'))
================================================
FILE: src/tools/eval_coco.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import sys
import cv2
import numpy as np
import pickle
import os
this_dir = os.path.dirname(__file__)
ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json'
print(ANN_PATH)
if __name__ == '__main__':
pred_path = sys.argv[1]
coco = coco.COCO(ANN_PATH)
dets = coco.loadRes(pred_path)
img_ids = coco.getImgIds()
num_images = len(img_ids)
coco_eval = COCOeval(coco, dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
================================================
FILE: src/tools/eval_coco_hp.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import sys
import cv2
import numpy as np
import pickle
import os
this_dir = os.path.dirname(__file__)
ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json'
print(ANN_PATH)
if __name__ == '__main__':
pred_path = sys.argv[1]
coco = coco.COCO(ANN_PATH)
dets = coco.loadRes(pred_path)
img_ids = coco.getImgIds()
num_images = len(img_ids)
coco_eval = COCOeval(coco, dets, "keypoints")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_eval = COCOeval(coco, dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
================================================
FILE: src/tools/get_kitti.sh
================================================
mkdir kitti
cd kitti
wget http://www.cvlibs.net/download.php?file=data_object_image_2.zip
wget http://www.cvlibs.net/download.php?file=data_object_label_2.zip
wget http://www.cvlibs.net/download.php?file=data_object_calib.zip
unzip data_object_image_2.zip
unzip data_object_label_2.zip
unzip data_object_calib.zip
================================================
FILE: src/tools/get_pascal_voc.sh
================================================
mkdir voc
cd voc
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar
tar xvf VOCtrainval_06-Nov-2007.tar
tar xvf VOCtest_06-Nov-2007.tar
tar xvf VOCdevkit_08-Jun-2007.tar
tar xvf VOCtrainval_11-May-2012.tar
tar xvf VOCdevkit_18-May-2011.tar
rm VOCtrainval_06-Nov-2007.tar
rm VOCtest_06-Nov-2007.tar
rm VOCdevkit_08-Jun-2007.tar
rm VOCtrainval_11-May-2012.tar
rm VOCdevkit_18-May-2011.tar
mkdir images
cp VOCdevkit/VOC2007/JPEGImages/* images/
cp VOCdevkit/VOC2012/JPEGImages/* images/
wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip
unzip PASCAL_VOC.zip
rm PASCAL_VOC.zip
mv PASCAL_VOC annotations/
cd ..
python merge_pascal_json.py
================================================
FILE: src/tools/kitti_eval/README.md
================================================
# kitti_eval
`evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics:
- overlap on image (AP)
- oriented overlap on image (AOS)
- overlap on ground-plane (AP)
- overlap in 3D (AP)
Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux).
Run the evalutaion by:
./evaluate_object_3d_offline groundtruth_dir result_dir
Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist.
### Updates
- June, 2017:
* Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017.
================================================
FILE: src/tools/kitti_eval/evaluate_object_3d.cpp
================================================
// from https://github.com/prclibo/kitti_eval
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "mail.h"
BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
typedef boost::geometry::model::polygon > Polygon;
using namespace std;
/*=======================================================================
STATIC EVALUATION PARAMETERS
=======================================================================*/
// holds the number of test images on the server
const int32_t N_TESTIMAGES = 7518;
// easy, moderate and hard evaluation level
enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
// evaluation metrics: image, ground or 3D
enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
// evaluation parameter
const int32_t MIN_HEIGHT[3] = {40, 25, 25}; // minimum height for evaluated groundtruth/detections
const int32_t MAX_OCCLUSION[3] = {0, 1, 2}; // maximum occlusion level of the groundtruth used for evaluation
const double MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
// evaluated object classes
enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
const int NUM_CLASS = 3;
// parameters varying per class
vector CLASS_NAMES;
// the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}};
// no. of recall steps that should be evaluated (discretized)
const double N_SAMPLE_PTS = 41;
// initialize class names
void initGlobals () {
CLASS_NAMES.push_back("car");
CLASS_NAMES.push_back("pedestrian");
CLASS_NAMES.push_back("cyclist");
}
/*=======================================================================
DATA TYPES FOR EVALUATION
=======================================================================*/
// holding data needed for precision-recall and precision-aos
struct tPrData {
vector v; // detection score for computing score thresholds
double similarity; // orientation similarity
int32_t tp; // true positives
int32_t fp; // false positives
int32_t fn; // false negatives
tPrData () :
similarity(0), tp(0), fp(0), fn(0) {}
};
// holding bounding boxes for ground truth and detections
struct tBox {
string type; // object type as car, pedestrian or cyclist,...
double x1; // left corner
double y1; // top corner
double x2; // right corner
double y2; // bottom corner
double alpha; // image orientation
tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
};
// holding ground truth data
struct tGroundtruth {
tBox box; // object type, box, orientation
double truncation; // truncation 0..1
int32_t occlusion; // occlusion 0,1,2 (non, partly, fully)
double ry;
double t1, t2, t3;
double h, w, l;
tGroundtruth () :
box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
tGroundtruth (tBox box,double truncation,int32_t occlusion) :
box(box),truncation(truncation),occlusion(occlusion) {}
tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
};
// holding detection data
struct tDetection {
tBox box; // object type, box, orientation
double thresh; // detection score
double ry;
double t1, t2, t3;
double h, w, l;
tDetection ():
box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
tDetection (tBox box,double thresh) :
box(box),thresh(thresh) {}
tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
};
/*=======================================================================
FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
=======================================================================*/
vector indices;
vector loadDetections(string file_name, bool &compute_aos,
vector &eval_image, vector &eval_ground,
vector &eval_3d, bool &success) {
// holds all detections (ignored detections are indicated by an index vector
vector detections;
FILE *fp = fopen(file_name.c_str(),"r");
if (!fp) {
success = false;
return detections;
}
while (!feof(fp)) {
tDetection d;
double trash;
char str[255];
if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
&d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
&d.ry, &d.thresh)==16) {
// d.thresh = 1;
d.box.type = str;
detections.push_back(d);
// orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
if(d.box.alpha == -10)
compute_aos = false;
// a class is only evaluated if it is detected at least once
for (int c = 0; c < NUM_CLASS; c++) {
if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
if (!eval_image[c] && d.box.x1 >= 0)
eval_image[c] = true;
if (!eval_ground[c] && d.t1 != -1000)
eval_ground[c] = true;
if (!eval_3d[c] && d.t2 != -1000)
eval_3d[c] = true;
break;
}
}
}
}
fclose(fp);
success = true;
return detections;
}
vector loadGroundtruth(string file_name,bool &success) {
// holds all ground truth (ignored ground truth is indicated by an index vector
vector groundtruth;
FILE *fp = fopen(file_name.c_str(),"r");
if (!fp) {
success = false;
return groundtruth;
}
while (!feof(fp)) {
tGroundtruth g;
char str[255];
if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
str, &g.truncation, &g.occlusion, &g.box.alpha,
&g.box.x1, &g.box.y1, &g.box.x2, &g.box.y2,
&g.h, &g.w, &g.l, &g.t1,
&g.t2, &g.t3, &g.ry )==15) {
g.box.type = str;
groundtruth.push_back(g);
}
}
fclose(fp);
success = true;
return groundtruth;
}
void saveStats (const vector &precision, const vector &aos, FILE *fp_det, FILE *fp_ori) {
// save precision to file
if(precision.empty())
return;
for (int32_t i=0; i
Polygon toPolygon(const T& g) {
using namespace boost::numeric::ublas;
using namespace boost::geometry;
matrix mref(2, 2);
mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
static int count = 0;
matrix corners(2, 4);
double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
std::copy(data, data + 8, corners.data().begin());
matrix gc = prod(mref, corners);
for (int i = 0; i < 4; ++i) {
gc(0, i) += g.t1;
gc(1, i) += g.t3;
}
double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
Polygon poly;
append(poly, points);
return poly;
}
// measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
using namespace boost::geometry;
Polygon gp = toPolygon(g);
Polygon dp = toPolygon(d);
std::vector in, un;
intersection(gp, dp, in);
union_(gp, dp, un);
double inter_area = in.empty() ? 0 : area(in.front());
double union_area = area(un.front());
double o;
if(criterion==-1) // union
o = inter_area / union_area;
else if(criterion==0) // bbox_a
o = inter_area / area(dp);
else if(criterion==1) // bbox_b
o = inter_area / area(gp);
return o;
}
// measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
using namespace boost::geometry;
Polygon gp = toPolygon(g);
Polygon dp = toPolygon(d);
std::vector in, un;
intersection(gp, dp, in);
union_(gp, dp, un);
double ymax = min(d.t2, g.t2);
double ymin = max(d.t2 - d.h, g.t2 - g.h);
double inter_area = in.empty() ? 0 : area(in.front());
double inter_vol = inter_area * max(0.0, ymax - ymin);
double det_vol = d.h * d.l * d.w;
double gt_vol = g.h * g.l * g.w;
double o;
if(criterion==-1) // union
o = inter_vol / (det_vol + gt_vol - inter_vol);
else if(criterion==0) // bbox_a
o = inter_vol / det_vol;
else if(criterion==1) // bbox_b
o = inter_vol / gt_vol;
return o;
}
vector getThresholds(vector &v, double n_groundtruth){
// holds scores needed to compute N_SAMPLE_PTS recall values
vector t;
// sort scores in descending order
// (highest score is assumed to give best/most confident detections)
sort(v.begin(), v.end(), greater());
// get scores for linearly spaced recall
double current_recall = 0;
for(int32_t i=0; i >, const vector &det, vector &ignored_gt, vector &dc, vector &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
// extract ground truth bounding boxes for current evaluation class
for(int32_t i=0;iMAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height >,
const vector &det, const vector &dc,
const vector &ignored_gt, const vector &ignored_det,
bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
tPrData stat = tPrData();
const double NO_DETECTION = -10000000;
vector delta; // holds angular difference for TPs (needed for AOS evaluation)
vector assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
assigned_detection.assign(det.size(), false);
vector ignored_threshold;
ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
// detections with a low score are ignored for computing precision (needs FP)
if(compute_fp)
for(int32_t i=0; i 0.5) (logical len(det))
=======================================================================*/
int32_t det_idx = -1;
double valid_detection = NO_DETECTION;
double max_overlap = 0;
// search for a possible detection
bool assigned_ignored_det = false;
for(int32_t j=0; jMIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
det_idx = j;
valid_detection = det[j].thresh;
}
// for computing pr curve values, the candidate with the greatest overlap is considered
// if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
max_overlap = overlap;
det_idx = j;
valid_detection = 1;
assigned_ignored_det = false;
}
else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
det_idx = j;
valid_detection = 1;
assigned_ignored_det = true;
}
}
/*=======================================================================
compute TP, FP and FN
=======================================================================*/
// nothing was assigned to this valid ground truth
if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
stat.fn++;
}
// only evaluate valid ground truth <=> detection assignments (considering difficulty level)
else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
assigned_detection[det_idx] = true;
// found a valid true positive
else if(valid_detection!=NO_DETECTION){
// write highest score to threshold vector
stat.tp++;
stat.v.push_back(det[det_idx].thresh);
// compute angular difference of detection and ground truth if valid detection orientation was provided
if(compute_aos)
delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
// clean up
assigned_detection[det_idx] = true;
}
}
// if FP are requested, consider stuff area
if(compute_fp){
// count fp
for(int32_t i=0; iMIN_OVERLAP[metric][current_class]){
assigned_detection[j] = true;
nstuff++;
}
}
}
// FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
stat.fp -= nstuff;
// if all orientation values are valid, the AOS is computed
if(compute_aos){
vector tmp;
// FP have a similarity of 0, for all TP compute AOS
tmp.assign(stat.fp, 0);
for(int32_t i=0; i0 || stat.fp>0)
stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
// there was neither a FP nor a TP, so the similarity is ignored in the evaluation
else
stat.similarity = -1;
}
}
return stat;
}
/*=======================================================================
EVALUATE CLASS-WISE
=======================================================================*/
bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
const vector< vector > &groundtruth,
const vector< vector > &detections, bool compute_aos,
double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
vector &precision, vector &aos,
DIFFICULTY difficulty, METRIC metric) {
assert(groundtruth.size() == detections.size());
// init
int32_t n_gt=0; // total no. of gt (denominator of recall)
vector v, thresholds; // detection scores, evaluated for recall discretization
vector< vector > ignored_gt, ignored_det; // index of ignored gt detection for current class/difficulty
vector< vector > dontcare; // index of dontcare areas, included in ground truth
// for all test images do
for (int32_t i=0; i i_gt, i_det;
vector dc;
// only evaluate objects of current class and ignore occluded, truncated objects
cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
ignored_gt.push_back(i_gt);
ignored_det.push_back(i_det);
dontcare.push_back(dc);
// compute statistics to get recall values
tPrData pr_tmp = tPrData();
pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
// add detection scores to vector over all images
for(int32_t j=0; j pr;
pr.assign(thresholds.size(),tPrData());
for (int32_t i=0; i recall;
precision.assign(N_SAMPLE_PTS, 0);
if(compute_aos)
aos.assign(N_SAMPLE_PTS, 0);
double r=0;
for (int32_t i=0; i vals[],bool is_aos){
char command[1024];
// save plot data to file
FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
fclose(fp);
// create png + eps
for (int32_t j=0; j<2; j++) {
// open file
FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
// save gnuplot instructions
if (j==0) {
fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
} else {
fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
}
// set labels and ranges
fprintf(fp,"set size ratio 0.7\n");
fprintf(fp,"set xrange [0:1]\n");
fprintf(fp,"set yrange [0:1]\n");
fprintf(fp,"set xlabel \"Recall\"\n");
if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
else fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
obj_type[0] = toupper(obj_type[0]);
fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
// line width
int32_t lw = 5;
if (j==0) lw = 3;
// plot error curve
fprintf(fp,"plot ");
fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
// close file
fclose(fp);
// run gnuplot => create png + eps
sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
system(command);
}
// create pdf and crop
sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
system(command);
sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
system(command);
sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
system(command);
}
bool eval(string result_sha,Mail* mail){
// set some global parameters
initGlobals();
// ground truth and result directories
string gt_dir = "data/object/label_2";
string result_dir = "results/" + result_sha;
string plot_dir = result_dir + "/plot";
// create output directories
system(("mkdir " + plot_dir).c_str());
// hold detections and ground truth in memory
vector< vector > groundtruth;
vector< vector > detections;
// holds wether orientation similarity shall be computed (might be set to false while loading detections)
// and which labels where provided by this submission
bool compute_aos=true;
vector eval_image(NUM_CLASS, false);
vector eval_ground(NUM_CLASS, false);
vector eval_3d(NUM_CLASS, false);
// for all images read groundtruth and detections
mail->msg("Loading detections...");
for (int32_t i=0; i gt = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
vector det = loadDetections(result_dir + "/data/" + file_name,
compute_aos, eval_image, eval_ground, eval_3d, det_success);
groundtruth.push_back(gt);
detections.push_back(det);
// check for errors
if (!gt_success) {
mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
return false;
}
if (!det_success) {
mail->msg("ERROR: Couldn't read: %s", file_name);
return false;
}
}
mail->msg(" done.");
// holds pointers for result files
FILE *fp_det=0, *fp_ori=0;
// eval image 2D bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_image[c]) {
fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
if(compute_aos)
fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
if(compute_aos){
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
fclose(fp_ori);
}
}
}
// don't evaluate AOS for birdview boxes and 3D boxes
compute_aos = false;
// eval bird's eye view bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_ground[c]) {
fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
}
}
// eval 3D bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_3d[c]) {
fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
}
}
// success
return true;
}
int32_t main (int32_t argc,char *argv[]) {
// we need 2 or 4 arguments!
if (argc!=2 && argc!=4) {
cout << "Usage: ./eval_detection result_sha [user_sha email]" << endl;
return 1;
}
// read arguments
string result_sha = argv[1];
// init notification mail
Mail *mail;
if (argc==4) mail = new Mail(argv[3]);
else mail = new Mail();
mail->msg("Thank you for participating in our evaluation!");
// run evaluation
if (eval(result_sha,mail)) {
mail->msg("Your evaluation results are available at:");
mail->msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=object&user=%s&result=%s",argv[2], result_sha.c_str());
} else {
system(("rm -r results/" + result_sha).c_str());
mail->msg("An error occured while processing your results.");
mail->msg("Please make sure that the data in your zip archive has the right format!");
}
// send mail and exit
delete mail;
return 0;
}
================================================
FILE: src/tools/kitti_eval/evaluate_object_3d_offline.cpp
================================================
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "mail.h"
BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
typedef boost::geometry::model::polygon > Polygon;
using namespace std;
/*=======================================================================
STATIC EVALUATION PARAMETERS
=======================================================================*/
// holds the number of test images on the server
const int32_t N_TESTIMAGES = 7518;
// easy, moderate and hard evaluation level
enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
// evaluation metrics: image, ground or 3D
enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
// evaluation parameter
const int32_t MIN_HEIGHT[3] = {40, 25, 25}; // minimum height for evaluated groundtruth/detections
const int32_t MAX_OCCLUSION[3] = {0, 1, 2}; // maximum occlusion level of the groundtruth used for evaluation
const double MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
// evaluated object classes
enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
const int NUM_CLASS = 3;
// parameters varying per class
vector CLASS_NAMES;
// the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}};
// const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}};
// no. of recall steps that should be evaluated (discretized)
const double N_SAMPLE_PTS = 41;
// initialize class names
void initGlobals () {
CLASS_NAMES.push_back("car");
CLASS_NAMES.push_back("pedestrian");
CLASS_NAMES.push_back("cyclist");
}
/*=======================================================================
DATA TYPES FOR EVALUATION
=======================================================================*/
// holding data needed for precision-recall and precision-aos
struct tPrData {
vector v; // detection score for computing score thresholds
double similarity; // orientation similarity
int32_t tp; // true positives
int32_t fp; // false positives
int32_t fn; // false negatives
tPrData () :
similarity(0), tp(0), fp(0), fn(0) {}
};
// holding bounding boxes for ground truth and detections
struct tBox {
string type; // object type as car, pedestrian or cyclist,...
double x1; // left corner
double y1; // top corner
double x2; // right corner
double y2; // bottom corner
double alpha; // image orientation
tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
};
// holding ground truth data
struct tGroundtruth {
tBox box; // object type, box, orientation
double truncation; // truncation 0..1
int32_t occlusion; // occlusion 0,1,2 (non, partly, fully)
double ry;
double t1, t2, t3;
double h, w, l;
tGroundtruth () :
box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
tGroundtruth (tBox box,double truncation,int32_t occlusion) :
box(box),truncation(truncation),occlusion(occlusion) {}
tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
};
// holding detection data
struct tDetection {
tBox box; // object type, box, orientation
double thresh; // detection score
double ry;
double t1, t2, t3;
double h, w, l;
tDetection ():
box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
tDetection (tBox box,double thresh) :
box(box),thresh(thresh) {}
tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
};
/*=======================================================================
FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
=======================================================================*/
vector indices;
vector loadDetections(string file_name, bool &compute_aos,
vector &eval_image, vector &eval_ground,
vector &eval_3d, bool &success) {
// holds all detections (ignored detections are indicated by an index vector
vector detections;
FILE *fp = fopen(file_name.c_str(),"r");
if (!fp) {
success = false;
return detections;
}
while (!feof(fp)) {
tDetection d;
double trash;
char str[255];
if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
&d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
&d.ry, &d.thresh)==16) {
// d.thresh = 1;
d.box.type = str;
detections.push_back(d);
// orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
if(d.box.alpha == -10)
compute_aos = false;
// a class is only evaluated if it is detected at least once
for (int c = 0; c < NUM_CLASS; c++) {
if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
if (!eval_image[c] && d.box.x1 >= 0)
eval_image[c] = true;
if (!eval_ground[c] && d.t1 != -1000)
eval_ground[c] = true;
if (!eval_3d[c] && d.t2 != -1000)
eval_3d[c] = true;
break;
}
}
}
}
fclose(fp);
success = true;
return detections;
}
vector loadGroundtruth(string file_name,bool &success) {
// holds all ground truth (ignored ground truth is indicated by an index vector
vector groundtruth;
FILE *fp = fopen(file_name.c_str(),"r");
if (!fp) {
success = false;
return groundtruth;
}
while (!feof(fp)) {
tGroundtruth g;
char str[255];
if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
str, &g.truncation, &g.occlusion, &g.box.alpha,
&g.box.x1, &g.box.y1, &g.box.x2, &g.box.y2,
&g.h, &g.w, &g.l, &g.t1,
&g.t2, &g.t3, &g.ry )==15) {
g.box.type = str;
groundtruth.push_back(g);
}
}
fclose(fp);
success = true;
return groundtruth;
}
void saveStats (const vector &precision, const vector &aos, FILE *fp_det, FILE *fp_ori) {
// save precision to file
if(precision.empty())
return;
for (int32_t i=0; i
Polygon toPolygon(const T& g) {
using namespace boost::numeric::ublas;
using namespace boost::geometry;
matrix mref(2, 2);
mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
static int count = 0;
matrix corners(2, 4);
double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
std::copy(data, data + 8, corners.data().begin());
matrix gc = prod(mref, corners);
for (int i = 0; i < 4; ++i) {
gc(0, i) += g.t1;
gc(1, i) += g.t3;
}
double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
Polygon poly;
append(poly, points);
return poly;
}
// measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
using namespace boost::geometry;
Polygon gp = toPolygon(g);
Polygon dp = toPolygon(d);
std::vector in, un;
intersection(gp, dp, in);
union_(gp, dp, un);
double inter_area = in.empty() ? 0 : area(in.front());
double union_area = area(un.front());
double o;
if(criterion==-1) // union
o = inter_area / union_area;
else if(criterion==0) // bbox_a
o = inter_area / area(dp);
else if(criterion==1) // bbox_b
o = inter_area / area(gp);
return o;
}
// measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
using namespace boost::geometry;
Polygon gp = toPolygon(g);
Polygon dp = toPolygon(d);
std::vector in, un;
intersection(gp, dp, in);
union_(gp, dp, un);
double ymax = min(d.t2, g.t2);
double ymin = max(d.t2 - d.h, g.t2 - g.h);
double inter_area = in.empty() ? 0 : area(in.front());
double inter_vol = inter_area * max(0.0, ymax - ymin);
double det_vol = d.h * d.l * d.w;
double gt_vol = g.h * g.l * g.w;
double o;
if(criterion==-1) // union
o = inter_vol / (det_vol + gt_vol - inter_vol);
else if(criterion==0) // bbox_a
o = inter_vol / det_vol;
else if(criterion==1) // bbox_b
o = inter_vol / gt_vol;
return o;
}
vector getThresholds(vector &v, double n_groundtruth){
// holds scores needed to compute N_SAMPLE_PTS recall values
vector t;
// sort scores in descending order
// (highest score is assumed to give best/most confident detections)
sort(v.begin(), v.end(), greater());
// get scores for linearly spaced recall
double current_recall = 0;
for(int32_t i=0; i >, const vector &det, vector &ignored_gt, vector &dc, vector &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
// extract ground truth bounding boxes for current evaluation class
for(int32_t i=0;iMAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height >,
const vector &det, const vector &dc,
const vector &ignored_gt, const vector &ignored_det,
bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
tPrData stat = tPrData();
const double NO_DETECTION = -10000000;
vector delta; // holds angular difference for TPs (needed for AOS evaluation)
vector assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
assigned_detection.assign(det.size(), false);
vector ignored_threshold;
ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
// detections with a low score are ignored for computing precision (needs FP)
if(compute_fp)
for(int32_t i=0; i 0.5) (logical len(det))
=======================================================================*/
int32_t det_idx = -1;
double valid_detection = NO_DETECTION;
double max_overlap = 0;
// search for a possible detection
bool assigned_ignored_det = false;
for(int32_t j=0; jMIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
det_idx = j;
valid_detection = det[j].thresh;
}
// for computing pr curve values, the candidate with the greatest overlap is considered
// if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
max_overlap = overlap;
det_idx = j;
valid_detection = 1;
assigned_ignored_det = false;
}
else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
det_idx = j;
valid_detection = 1;
assigned_ignored_det = true;
}
}
/*=======================================================================
compute TP, FP and FN
=======================================================================*/
// nothing was assigned to this valid ground truth
if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
stat.fn++;
}
// only evaluate valid ground truth <=> detection assignments (considering difficulty level)
else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
assigned_detection[det_idx] = true;
// found a valid true positive
else if(valid_detection!=NO_DETECTION){
// write highest score to threshold vector
stat.tp++;
stat.v.push_back(det[det_idx].thresh);
// compute angular difference of detection and ground truth if valid detection orientation was provided
if(compute_aos)
delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
// clean up
assigned_detection[det_idx] = true;
}
}
// if FP are requested, consider stuff area
if(compute_fp){
// count fp
for(int32_t i=0; iMIN_OVERLAP[metric][current_class]){
assigned_detection[j] = true;
nstuff++;
}
}
}
// FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
stat.fp -= nstuff;
// if all orientation values are valid, the AOS is computed
if(compute_aos){
vector tmp;
// FP have a similarity of 0, for all TP compute AOS
tmp.assign(stat.fp, 0);
for(int32_t i=0; i0 || stat.fp>0)
stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
// there was neither a FP nor a TP, so the similarity is ignored in the evaluation
else
stat.similarity = -1;
}
}
return stat;
}
/*=======================================================================
EVALUATE CLASS-WISE
=======================================================================*/
bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
const vector< vector > &groundtruth,
const vector< vector > &detections, bool compute_aos,
double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
vector &precision, vector &aos,
DIFFICULTY difficulty, METRIC metric) {
assert(groundtruth.size() == detections.size());
// init
int32_t n_gt=0; // total no. of gt (denominator of recall)
vector v, thresholds; // detection scores, evaluated for recall discretization
vector< vector > ignored_gt, ignored_det; // index of ignored gt detection for current class/difficulty
vector< vector > dontcare; // index of dontcare areas, included in ground truth
// for all test images do
for (int32_t i=0; i i_gt, i_det;
vector dc;
// only evaluate objects of current class and ignore occluded, truncated objects
cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
ignored_gt.push_back(i_gt);
ignored_det.push_back(i_det);
dontcare.push_back(dc);
// compute statistics to get recall values
tPrData pr_tmp = tPrData();
pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
// add detection scores to vector over all images
for(int32_t j=0; j pr;
pr.assign(thresholds.size(),tPrData());
for (int32_t i=0; i recall;
precision.assign(N_SAMPLE_PTS, 0);
if(compute_aos)
aos.assign(N_SAMPLE_PTS, 0);
double r=0;
for (int32_t i=0; i vals[],bool is_aos){
char command[1024];
// save plot data to file
FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
fclose(fp);
float sum[3] = {0, 0, 0};
for (int v = 0; v < 3; ++v)
for (int i = 0; i < vals[v].size(); i = i + 4)
sum[v] += vals[v][i];
printf("%s AP: %f %f %f\n", file_name.c_str(), sum[0] / 11 * 100, sum[1] / 11 * 100, sum[2] / 11 * 100);
// create png + eps
for (int32_t j=0; j<2; j++) {
// open file
FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
// save gnuplot instructions
if (j==0) {
fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
} else {
fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
}
// set labels and ranges
fprintf(fp,"set size ratio 0.7\n");
fprintf(fp,"set xrange [0:1]\n");
fprintf(fp,"set yrange [0:1]\n");
fprintf(fp,"set xlabel \"Recall\"\n");
if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
else fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
obj_type[0] = toupper(obj_type[0]);
fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
// line width
int32_t lw = 5;
if (j==0) lw = 3;
// plot error curve
fprintf(fp,"plot ");
fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
// close file
fclose(fp);
// run gnuplot => create png + eps
sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
system(command);
}
// create pdf and crop
sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
system(command);
sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
system(command);
sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
system(command);
}
vector getEvalIndices(const string& result_dir) {
DIR* dir;
dirent* entity;
dir = opendir(result_dir.c_str());
if (dir) {
while (entity = readdir(dir)) {
string path(entity->d_name);
int32_t len = path.size();
if (len < 10) continue;
int32_t index = atoi(path.substr(len - 10, 10).c_str());
indices.push_back(index);
}
}
return indices;
}
bool eval(string gt_dir, string result_dir, Mail* mail){
// set some global parameters
initGlobals();
// ground truth and result directories
// string gt_dir = "data/object/label_2";
// string result_dir = "results/" + result_sha;
string plot_dir = result_dir + "/../plot";
// create output directories
system(("mkdir " + plot_dir).c_str());
// hold detections and ground truth in memory
vector< vector > groundtruth;
vector< vector > detections;
// holds wether orientation similarity shall be computed (might be set to false while loading detections)
// and which labels where provided by this submission
bool compute_aos=true;
vector eval_image(NUM_CLASS, false);
vector eval_ground(NUM_CLASS, false);
vector eval_3d(NUM_CLASS, false);
// for all images read groundtruth and detections
mail->msg("Loading detections...");
std::vector indices = getEvalIndices(result_dir);
printf("number of files for evaluation: %d\n", (int)indices.size());
for (int32_t i=0; i gt = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
vector det = loadDetections(result_dir + file_name,
compute_aos, eval_image, eval_ground, eval_3d, det_success);
groundtruth.push_back(gt);
detections.push_back(det);
// check for errors
if (!gt_success) {
mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
return false;
}
if (!det_success) {
mail->msg("ERROR: Couldn't read: %s", file_name);
return false;
}
}
mail->msg(" done.");
// holds pointers for result files
FILE *fp_det=0, *fp_ori=0;
// eval image 2D bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_image[c]) {
fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
if(compute_aos)
fp_ori = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
if(compute_aos){
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
fclose(fp_ori);
}
}
}
// don't evaluate AOS for birdview boxes and 3D boxes
compute_aos = false;
// eval bird's eye view bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_ground[c]) {
fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
}
}
// eval 3D bounding boxes
for (int c = 0; c < NUM_CLASS; c++) {
CLASSES cls = (CLASSES)c;
if (eval_3d[c]) {
fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
vector precision[3], aos[3];
if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
|| !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
return false;
}
fclose(fp_det);
saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
}
}
// success
return true;
}
int32_t main (int32_t argc,char *argv[]) {
// we need 2 or 4 arguments!
if (argc!=3) {
cout << "Usage: ./eval_detection_3d_offline gt_dir result_dir" << endl;
return 1;
}
// read arguments
string gt_dir = argv[1];
string result_dir = argv[2];
// init notification mail
Mail *mail;
mail = new Mail();
mail->msg("Thank you for participating in our evaluation!");
// run evaluation
if (eval(gt_dir, result_dir, mail)) {
mail->msg("Your evaluation results are available at:");
mail->msg(result_dir.c_str());
} else {
system(("rm -r " + result_dir + "/../plot").c_str());
mail->msg("An error occured while processing your results.");
}
// send mail and exit
delete mail;
return 0;
}
================================================
FILE: src/tools/kitti_eval/mail.h
================================================
#ifndef MAIL_H
#define MAIL_H
#include
#include
#include
class Mail {
public:
Mail (std::string email = "") {
if (email.compare("")) {
mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w");
fprintf(mail,"To: %s\n", email.c_str());
fprintf(mail,"From: noreply@cvlibs.net\n");
fprintf(mail,"Subject: KITTI Evaluation Benchmark\n");
fprintf(mail,"\n\n");
} else {
mail = 0;
}
}
~Mail() {
if (mail) {
pclose(mail);
}
}
void msg (const char *format, ...) {
va_list args;
va_start(args,format);
if (mail) {
vfprintf(mail,format,args);
fprintf(mail,"\n");
}
vprintf(format,args);
printf("\n");
va_end(args);
}
private:
FILE *mail;
};
#endif
================================================
FILE: src/tools/merge_pascal_json.py
================================================
import json
# ANNOT_PATH = '/home/zxy/Datasets/VOC/annotations/'
ANNOT_PATH = 'voc/annotations/'
OUT_PATH = ANNOT_PATH
INPUT_FILES = ['pascal_train2012.json', 'pascal_val2012.json',
'pascal_train2007.json', 'pascal_val2007.json']
OUTPUT_FILE = 'pascal_trainval0712.json'
KEYS = ['images', 'type', 'annotations', 'categories']
MERGE_KEYS = ['images', 'annotations']
out = {}
tot_anns = 0
for i, file_name in enumerate(INPUT_FILES):
data = json.load(open(ANNOT_PATH + file_name, 'r'))
print('keys', data.keys())
if i == 0:
for key in KEYS:
out[key] = data[key]
print(file_name, key, len(data[key]))
else:
out['images'] += data['images']
for j in range(len(data['annotations'])):
data['annotations'][j]['id'] += tot_anns
out['annotations'] += data['annotations']
print(file_name, 'images', len(data['images']))
print(file_name, 'annotations', len(data['annotations']))
tot_anns = len(out['annotations'])
print('tot', len(out['annotations']))
json.dump(out, open(OUT_PATH + OUTPUT_FILE, 'w'))
================================================
FILE: src/tools/reval.py
================================================
#!/usr/bin/env python
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# Modified by Xingyi Zhou
# --------------------------------------------------------
# Reval = re-eval. Re-evaluate saved detections.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os.path as osp
sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib'))
from model.test import apply_nms
from datasets.pascal_voc import pascal_voc
import pickle
import os, argparse
import numpy as np
import json
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Re-evaluate results')
parser.add_argument('detection_file', type=str)
parser.add_argument('--output_dir', help='results directory', type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to re-evaluate',
default='voc_2007_test', type=str)
parser.add_argument('--matlab', dest='matlab_eval',
help='use matlab for evaluation',
action='store_true')
parser.add_argument('--comp', dest='comp_mode', help='competition mode',
action='store_true')
parser.add_argument('--nms', dest='apply_nms', help='apply nms',
action='store_true')
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def from_dets(imdb_name, detection_file, args):
imdb = pascal_voc('test', '2007')
imdb.competition_mode(args.comp_mode)
imdb.config['matlab_eval'] = args.matlab_eval
with open(os.path.join(detection_file), 'rb') as f:
if 'json' in detection_file:
dets = json.load(f)
else:
dets = pickle.load(f, encoding='latin1')
# import pdb; pdb.set_trace()
if args.apply_nms:
print('Applying NMS to all detections')
test_nms = 0.3
nms_dets = apply_nms(dets, test_nms)
else:
nms_dets = dets
print('Evaluating detections')
imdb.evaluate_detections(nms_dets)
if __name__ == '__main__':
args = parse_args()
imdb_name = args.imdb_name
from_dets(imdb_name, args.detection_file, args)
================================================
FILE: src/tools/vis_pred.py
================================================
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import sys
import cv2
import numpy as np
import pickle
IMG_PATH = '../../data/coco/val2017/'
ANN_PATH = '../../data/coco/annotations/instances_val2017.json'
DEBUG = True
def _coco_box_to_bbox(box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.int32)
return bbox
_cat_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90
]
num_classes = 80
_classes = {
ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids)
}
_to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)}
coco = coco.COCO(ANN_PATH)
CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \
for i in range(num_classes)]
COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \
for _ in range(num_classes)]
def add_box(image, bbox, sc, cat_id):
cat_id = _to_order[cat_id]
cat_name = CAT_NAMES[cat_id]
cat_size = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
color = np.array(COLORS[cat_id]).astype(np.int32).tolist()
txt = '{}{:.0f}'.format(cat_name, sc * 10)
if bbox[1] - cat_size[1] - 2 < 0:
cv2.rectangle(image,
(bbox[0], bbox[1] + 2),
(bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
color, -1)
cv2.putText(image, txt,
(bbox[0], bbox[1] + cat_size[1] + 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
else:
cv2.rectangle(image,
(bbox[0], bbox[1] - cat_size[1] - 2),
(bbox[0] + cat_size[0], bbox[1] - 2),
color, -1)
cv2.putText(image, txt,
(bbox[0], bbox[1] - 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
cv2.rectangle(image,
(bbox[0], bbox[1]),
(bbox[2], bbox[3]),
color, 2)
return image
if __name__ == '__main__':
dets = []
img_ids = coco.getImgIds()
num_images = len(img_ids)
for k in range(1, len(sys.argv)):
pred_path = sys.argv[k]
dets.append(coco.loadRes(pred_path))
# import pdb; pdb.set_trace()
for i, img_id in enumerate(img_ids):
img_info = coco.loadImgs(ids=[img_id])[0]
img_path = IMG_PATH + img_info['file_name']
img = cv2.imread(img_path)
gt_ids = coco.getAnnIds(imgIds=[img_id])
gts = coco.loadAnns(gt_ids)
gt_img = img.copy()
for j, pred in enumerate(gts):
bbox = _coco_box_to_bbox(pred['bbox'])
cat_id = pred['category_id']
gt_img = add_box(gt_img, bbox, 0, cat_id)
for k in range(len(dets)):
pred_ids = dets[k].getAnnIds(imgIds=[img_id])
preds = dets[k].loadAnns(pred_ids)
pred_img = img.copy()
for j, pred in enumerate(preds):
bbox = _coco_box_to_bbox(pred['bbox'])
sc = pred['score']
cat_id = pred['category_id']
if sc > 0.2:
pred_img = add_box(pred_img, bbox, sc, cat_id)
cv2.imshow('pred{}'.format(k), pred_img)
# cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img)
cv2.imshow('gt', gt_img)
# cv2.imwrite('vis/{}_gt.png'.format(i), gt_img)
cv2.waitKey()
# coco_eval.evaluate()
# coco_eval.accumulate()
# coco_eval.summarize()
================================================
FILE: src/tools/voc_eval_lib/LICENSE
================================================
MIT License
Copyright (c) 2017 Xinlei Chen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: src/tools/voc_eval_lib/Makefile
================================================
all:
python setup.py build_ext --inplace
rm -rf build
clean:
rm -rf */*.pyc
rm -rf */*.so
================================================
FILE: src/tools/voc_eval_lib/__init__.py
================================================
================================================
FILE: src/tools/voc_eval_lib/datasets/__init__.py
================================================
================================================
FILE: src/tools/voc_eval_lib/datasets/bbox.pyx
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------
cimport cython
import numpy as np
cimport numpy as np
DTYPE = np.float
ctypedef np.float_t DTYPE_t
def bbox_overlaps(
np.ndarray[DTYPE_t, ndim=2] boxes,
np.ndarray[DTYPE_t, ndim=2] query_boxes):
"""
Parameters
----------
boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
cdef DTYPE_t iw, ih, box_area
cdef DTYPE_t ua
cdef unsigned int k, n
for k in range(K):
box_area = (
(query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
)
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + 1
)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + 1
)
if ih > 0:
ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) *
(boxes[n, 3] - boxes[n, 1] + 1) +
box_area - iw * ih
)
overlaps[n, k] = iw * ih / ua
return overlaps
================================================
FILE: src/tools/voc_eval_lib/datasets/ds_utils.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def unique_boxes(boxes, scale=1.0):
"""Return indices of unique boxes."""
v = np.array([1, 1e3, 1e6, 1e9])
hashes = np.round(boxes * scale).dot(v)
_, index = np.unique(hashes, return_index=True)
return np.sort(index)
def xywh_to_xyxy(boxes):
"""Convert [x y w h] box format to [x1 y1 x2 y2] format."""
return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
def xyxy_to_xywh(boxes):
"""Convert [x1 y1 x2 y2] box format to [x y w h] format."""
return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
def validate_boxes(boxes, width=0, height=0):
"""Check that a set of boxes are valid."""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
assert (x1 >= 0).all()
assert (y1 >= 0).all()
assert (x2 >= x1).all()
assert (y2 >= y1).all()
assert (x2 < width).all()
assert (y2 < height).all()
def filter_small_boxes(boxes, min_size):
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
keep = np.where((w >= min_size) & (h > min_size))[0]
return keep
================================================
FILE: src/tools/voc_eval_lib/datasets/imdb.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# Modified by Xingyi Zhou
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path as osp
import PIL
# from utils.cython_bbox import bbox_overlaps
import numpy as np
import scipy.sparse
from model.config import cfg
def bbox_overlaps(box1, box2):
area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
inter = max(min(box1[2], box2[2]) - max(box1[0], box2[0]) + 1, 0) * \
max(min(box1[3], box2[3]) - max(box1[1], box2[1]) + 1, 0)
iou = 1.0 * inter / (area1 + area2 - inter)
return iou
class imdb(object):
"""Image database."""
def __init__(self, name, classes=None):
self._name = name
self._num_classes = 0
if not classes:
self._classes = []
else:
self._classes = classes
self._image_index = []
self._obj_proposer = 'gt'
self._roidb = None
self._roidb_handler = self.default_roidb
# Use this dict for storing dataset specific config options
self.config = {}
@property
def name(self):
return self._name
@property
def num_classes(self):
return len(self._classes)
@property
def classes(self):
return self._classes
@property
def image_index(self):
return self._image_index
@property
def roidb_handler(self):
return self._roidb_handler
@roidb_handler.setter
def roidb_handler(self, val):
self._roidb_handler = val
def set_proposal_method(self, method):
method = eval('self.' + method + '_roidb')
self.roidb_handler = method
@property
def roidb(self):
# A roidb is a list of dictionaries, each with the following keys:
# boxes
# gt_overlaps
# gt_classes
# flipped
if self._roidb is not None:
return self._roidb
self._roidb = self.roidb_handler()
return self._roidb
@property
def cache_path(self):
cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property
def num_images(self):
return len(self.image_index)
def image_path_at(self, i):
raise NotImplementedError
def default_roidb(self):
raise NotImplementedError
def evaluate_detections(self, all_boxes, output_dir=None):
"""
all_boxes is a list of length number-of-classes.
Each list element is a list of length number-of-images.
Each of those list elements is either an empty list []
or a numpy array of detection.
all_boxes[class][image] = [] or np.array of shape #dets x 5
"""
raise NotImplementedError
def _get_widths(self):
return [PIL.Image.open(self.image_path_at(i)).size[0]
for i in range(self.num_images)]
def append_flipped_images(self):
num_images = self.num_images
widths = self._get_widths()
for i in range(num_images):
boxes = self.roidb[i]['boxes'].copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
boxes[:, 0] = widths[i] - oldx2 - 1
boxes[:, 2] = widths[i] - oldx1 - 1
assert (boxes[:, 2] >= boxes[:, 0]).all()
entry = {'boxes': boxes,
'gt_overlaps': self.roidb[i]['gt_overlaps'],
'gt_classes': self.roidb[i]['gt_classes'],
'flipped': True}
self.roidb.append(entry)
self._image_index = self._image_index * 2
def evaluate_recall(self, candidate_boxes=None, thresholds=None,
area='all', limit=None):
"""Evaluate detection proposal recall metrics.
Returns:
results: dictionary of results with keys
'ar': average recall
'recalls': vector recalls at each IoU overlap threshold
'thresholds': vector of IoU overlap thresholds
'gt_overlaps': vector of all ground-truth overlaps
"""
# Record max overlap value for each gt box
# Return vector of overlap values
areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
'96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
area_ranges = [[0 ** 2, 1e5 ** 2], # all
[0 ** 2, 32 ** 2], # small
[32 ** 2, 96 ** 2], # medium
[96 ** 2, 1e5 ** 2], # large
[96 ** 2, 128 ** 2], # 96-128
[128 ** 2, 256 ** 2], # 128-256
[256 ** 2, 512 ** 2], # 256-512
[512 ** 2, 1e5 ** 2], # 512-inf
]
assert area in areas, 'unknown area range: {}'.format(area)
area_range = area_ranges[areas[area]]
gt_overlaps = np.zeros(0)
num_pos = 0
for i in range(self.num_images):
# Checking for max_overlaps == 1 avoids including crowd annotations
# (...pretty hacking :/)
max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
(max_gt_overlaps == 1))[0]
gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
gt_areas = self.roidb[i]['seg_areas'][gt_inds]
valid_gt_inds = np.where((gt_areas >= area_range[0]) &
(gt_areas <= area_range[1]))[0]
gt_boxes = gt_boxes[valid_gt_inds, :]
num_pos += len(valid_gt_inds)
if candidate_boxes is None:
# If candidate_boxes is not supplied, the default is to use the
# non-ground-truth boxes from this roidb
non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
boxes = self.roidb[i]['boxes'][non_gt_inds, :]
else:
boxes = candidate_boxes[i]
if boxes.shape[0] == 0:
continue
if limit is not None and boxes.shape[0] > limit:
boxes = boxes[:limit, :]
overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
_gt_overlaps = np.zeros((gt_boxes.shape[0]))
for j in range(gt_boxes.shape[0]):
# find which proposal box maximally covers each gt box
argmax_overlaps = overlaps.argmax(axis=0)
# and get the iou amount of coverage for each gt box
max_overlaps = overlaps.max(axis=0)
# find which gt box is 'best' covered (i.e. 'best' = most iou)
gt_ind = max_overlaps.argmax()
gt_ovr = max_overlaps.max()
assert (gt_ovr >= 0)
# find the proposal box that covers the best covered gt box
box_ind = argmax_overlaps[gt_ind]
# record the iou coverage of this gt box
_gt_overlaps[j] = overlaps[box_ind, gt_ind]
assert (_gt_overlaps[j] == gt_ovr)
# mark the proposal box and the gt box as used
overlaps[box_ind, :] = -1
overlaps[:, gt_ind] = -1
# append recorded iou coverage level
gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
gt_overlaps = np.sort(gt_overlaps)
if thresholds is None:
step = 0.05
thresholds = np.arange(0.5, 0.95 + 1e-5, step)
recalls = np.zeros_like(thresholds)
# compute recall for each iou threshold
for i, t in enumerate(thresholds):
recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
# ar = 2 * np.trapz(recalls, thresholds)
ar = recalls.mean()
return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
'gt_overlaps': gt_overlaps}
def create_roidb_from_box_list(self, box_list, gt_roidb):
assert len(box_list) == self.num_images, \
'Number of boxes must match number of ground-truth images'
roidb = []
for i in range(self.num_images):
boxes = box_list[i]
num_boxes = boxes.shape[0]
overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
gt_boxes = gt_roidb[i]['boxes']
gt_classes = gt_roidb[i]['gt_classes']
gt_overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
argmaxes = gt_overlaps.argmax(axis=1)
maxes = gt_overlaps.max(axis=1)
I = np.where(maxes > 0)[0]
overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
overlaps = scipy.sparse.csr_matrix(overlaps)
roidb.append({
'boxes': boxes,
'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
'gt_overlaps': overlaps,
'flipped': False,
'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
})
return roidb
@staticmethod
def merge_roidbs(a, b):
assert len(a) == len(b)
for i in range(len(a)):
a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
b[i]['gt_classes']))
a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
b[i]['gt_overlaps']])
a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
b[i]['seg_areas']))
return a
def competition_mode(self, on):
"""Turn competition mode on or off."""
pass
================================================
FILE: src/tools/voc_eval_lib/datasets/pascal_voc.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import scipy.io as sio
# import utils.cython_bbox
import pickle
import subprocess
import uuid
from .voc_eval import voc_eval
from model.config import cfg
class pascal_voc(imdb):
def __init__(self, image_set, year, use_diff=False):
name = 'voc_' + year + '_' + image_set
if use_diff:
name += '_diff'
imdb.__init__(self, name)
self._year = year
self._image_set = image_set
self._devkit_path = self._get_default_path()
self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
self._classes = ('__background__', # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes)))))
self._image_ext = '.jpg'
self._image_index = self._load_image_set_index()
# Default to roidb handler
self._roidb_handler = self.gt_roidb
self._salt = str(uuid.uuid4())
self._comp_id = 'comp4'
# PASCAL specific config options
self.config = {'cleanup': True,
'use_salt': True,
'use_diff': use_diff,
'matlab_eval': False,
'rpn_file': None}
assert os.path.exists(self._devkit_path), \
'VOCdevkit path does not exist: {}'.format(self._devkit_path)
assert os.path.exists(self._data_path), \
'Path does not exist: {}'.format(self._data_path)
def image_path_at(self, i):
"""
Return the absolute path to image i in the image sequence.
"""
return self.image_path_from_index(self._image_index[i])
def image_path_from_index(self, index):
"""
Construct an image path from the image's "index" identifier.
"""
image_path = os.path.join(self._data_path, 'JPEGImages',
index + self._image_ext)
assert os.path.exists(image_path), \
'Path does not exist: {}'.format(image_path)
return image_path
def _load_image_set_index(self):
"""
Load the indexes listed in this dataset's image set file.
"""
# Example path to image set file:
# self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
self._image_set + '.txt')
assert os.path.exists(image_set_file), \
'Path does not exist: {}'.format(image_set_file)
with open(image_set_file) as f:
image_index = [x.strip() for x in f.readlines()]
return image_index
def _get_default_path(self):
"""
Return the default path where PASCAL VOC is expected to be installed.
"""
return os.path.join(cfg.DATA_DIR, 'voc', 'VOCdevkit')
def gt_roidb(self):
"""
Return the database of ground-truth regions of interest.
This function loads/saves from/to a cache file to speed up future calls.
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
try:
roidb = pickle.load(fid)
except:
roidb = pickle.load(fid, encoding='bytes')
print('{} gt roidb loaded from {}'.format(self.name, cache_file))
return roidb
gt_roidb = [self._load_pascal_annotation(index)
for index in self.image_index]
with open(cache_file, 'wb') as fid:
pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
print('wrote gt roidb to {}'.format(cache_file))
return gt_roidb
def rpn_roidb(self):
if int(self._year) == 2007 or self._image_set != 'test':
gt_roidb = self.gt_roidb()
rpn_roidb = self._load_rpn_roidb(gt_roidb)
roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
else:
roidb = self._load_rpn_roidb(None)
return roidb
def _load_rpn_roidb(self, gt_roidb):
filename = self.config['rpn_file']
print('loading {}'.format(filename))
assert os.path.exists(filename), \
'rpn data not found at: {}'.format(filename)
with open(filename, 'rb') as f:
box_list = pickle.load(f)
return self.create_roidb_from_box_list(box_list, gt_roidb)
def _load_pascal_annotation(self, index):
"""
Load image and bounding boxes info from XML file in the PASCAL VOC
format.
"""
filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
tree = ET.parse(filename)
objs = tree.findall('object')
if not self.config['use_diff']:
# Exclude the samples labeled as difficult
non_diff_objs = [
obj for obj in objs if int(obj.find('difficult').text) == 0]
# if len(non_diff_objs) != len(objs):
# print 'Removed {} difficult objects'.format(
# len(objs) - len(non_diff_objs))
objs = non_diff_objs
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.uint16)
gt_classes = np.zeros((num_objs), dtype=np.int32)
overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
# "Seg" area for pascal is just the box area
seg_areas = np.zeros((num_objs), dtype=np.float32)
# Load object bounding boxes into a data frame.
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
boxes[ix, :] = [x1, y1, x2, y2]
gt_classes[ix] = cls
overlaps[ix, cls] = 1.0
seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
overlaps = scipy.sparse.csr_matrix(overlaps)
return {'boxes': boxes,
'gt_classes': gt_classes,
'gt_overlaps': overlaps,
'flipped': False,
'seg_areas': seg_areas}
def _get_comp_id(self):
comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
else self._comp_id)
return comp_id
def _get_voc_results_file_template(self):
# VOCdevkit/results/VOC2007/Main/_det_test_aeroplane.txt
filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
path = os.path.join(
self._devkit_path,
'results',
'VOC' + self._year,
'Main',
filename)
return path
def _write_voc_results_file(self, all_boxes):
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
# print('Writing {} VOC results file'.format(cls))
filename = self._get_voc_results_file_template().format(cls)
# print(filename)
with open(filename, 'wt') as f:
for im_ind, index in enumerate(self.image_index):
dets = np.array(all_boxes[cls_ind][im_ind])
if len(dets) == 0:
continue
# the VOCdevkit expects 1-based indices
for k in range(dets.shape[0]):
f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
format(index, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1))
def _do_python_eval(self, output_dir=None):
annopath = os.path.join(
self._devkit_path,
'VOC' + self._year,
'Annotations',
'{:s}.xml')
imagesetfile = os.path.join(
self._devkit_path,
'VOC' + self._year,
'ImageSets',
'Main',
self._image_set + '.txt')
cachedir = os.path.join(self._devkit_path, 'annotations_cache')
aps = []
# The PASCAL VOC metric changed in 2010
use_07_metric = True if int(self._year) < 2010 else False
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
if output_dir is not None and not os.path.isdir(output_dir):
os.mkdir(output_dir)
for i, cls in enumerate(self._classes):
if cls == '__background__':
continue
filename = self._get_voc_results_file_template().format(cls)
rec, prec, ap = voc_eval(
filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
use_07_metric=use_07_metric, use_diff=self.config['use_diff'])
aps += [ap]
print(('AP for {} = {:.4f}'.format(cls, ap)))
if output_dir is not None:
with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
print(('Mean AP = {:.4f}'.format(np.mean(aps))))
print('~~~~~~~~')
'''
print('Results:')
for ap in aps:
print(('{:.3f}'.format(ap)))
print(('{:.3f}'.format(np.mean(aps))))
print('~~~~~~~~')
print('')
print('--------------------------------------------------------------')
print('Results computed with the **unofficial** Python eval code.')
print('Results should be very close to the official MATLAB eval code.')
print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
print('-- Thanks, The Management')
print('--------------------------------------------------------------')
'''
def _do_matlab_eval(self, output_dir='output'):
print('-----------------------------------------------------')
print('Computing results with the official MATLAB eval code.')
print('-----------------------------------------------------')
path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets',
'VOCdevkit-matlab-wrapper')
cmd = 'cd {} && '.format(path)
cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
cmd += '-r "dbstop if error; '
cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
.format(self._devkit_path, self._get_comp_id(),
self._image_set, output_dir)
print(('Running:\n{}'.format(cmd)))
status = subprocess.call(cmd, shell=True)
def evaluate_detections(self, all_boxes, output_dir=None):
self._write_voc_results_file(all_boxes)
self._do_python_eval(output_dir)
if self.config['matlab_eval']:
self._do_matlab_eval(output_dir)
if self.config['cleanup']:
for cls in self._classes:
if cls == '__background__':
continue
filename = self._get_voc_results_file_template().format(cls)
os.remove(filename)
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
if __name__ == '__main__':
from datasets.pascal_voc import pascal_voc
d = pascal_voc('trainval', '2007')
res = d.roidb
from IPython import embed;
embed()
================================================
FILE: src/tools/voc_eval_lib/datasets/voc_eval.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import xml.etree.ElementTree as ET
import os
import pickle
import numpy as np
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
# print(t, p)
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_eval(detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False,
use_diff=False):
"""rec, prec, ap = voc_eval(detpath,
annopath,
imagesetfile,
classname,
[ovthresh],
[use_07_metric])
Top level function that does the PASCAL VOC evaluation.
detpath: Path to detections
detpath.format(classname) should produce the detection results file.
annopath: Path to annotations
annopath.format(imagename) should be the xml annotations file.
imagesetfile: Text file containing the list of images, one image per line.
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
[use_07_metric]: Whether to use VOC07's 11 point AP computation
(default False)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
# assumes imagesetfile is a text file with each line an image name
# cachedir caches the annotations in a pickle file
# first load gt
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
# read list of images
with open(imagesetfile, 'r') as f:
lines = f.readlines()
imagenames = [x.strip() for x in lines]
if not os.path.isfile(cachefile):
# load annotations
recs = {}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
print('Reading annotation for {:d}/{:d}'.format(
i + 1, len(imagenames)))
# save
print('Saving cached annotations to {:s}'.format(cachefile))
with open(cachefile, 'wb') as f:
pickle.dump(recs, f)
else:
# load
with open(cachefile, 'rb') as f:
try:
recs = pickle.load(f)
except:
recs = pickle.load(f, encoding='bytes')
# extract gt objects for this class
class_recs = {}
npos = 0
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj['name'] == classname]
bbox = np.array([x['bbox'] for x in R])
if use_diff:
difficult = np.array([False for x in R]).astype(np.bool)
else:
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
npos = npos + sum(~difficult)
class_recs[imagename] = {'bbox': bbox,
'difficult': difficult,
'det': det}
# read dets
detfile = detpath.format(classname)
with open(detfile, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
if BB.shape[0] > 0:
# sort by confidence
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap
================================================
FILE: src/tools/voc_eval_lib/model/__init__.py
================================================
================================================
FILE: src/tools/voc_eval_lib/model/bbox_transform.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
return targets
def bbox_transform_inv(boxes, deltas):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
dx = deltas[:, 0::4]
dy = deltas[:, 1::4]
dw = deltas[:, 2::4]
dh = deltas[:, 3::4]
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
# y2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
return pred_boxes
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
"""
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
================================================
FILE: src/tools/voc_eval_lib/model/config.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path as osp
import numpy as np
# `pip install easydict` if you don't have it
from easydict import EasyDict as edict
__C = edict()
# Consumers can get config by:
# from fast_rcnn_config import cfg
cfg = __C
#
# Training options
#
__C.TRAIN = edict()
# Initial learning rate
__C.TRAIN.LEARNING_RATE = 0.001
# Momentum
__C.TRAIN.MOMENTUM = 0.9
# Weight decay, for regularization
__C.TRAIN.WEIGHT_DECAY = 0.0001
# Factor for reducing the learning rate
__C.TRAIN.GAMMA = 0.1
# Step size for reducing the learning rate, currently only support one step
__C.TRAIN.STEPSIZE = [30000]
# Iteration intervals for showing the loss during training, on command line interface
__C.TRAIN.DISPLAY = 10
# Whether to double the learning rate for bias
__C.TRAIN.DOUBLE_BIAS = True
# Whether to initialize the weights with truncated normal distribution
__C.TRAIN.TRUNCATED = False
# Whether to have weight decay on bias as well
__C.TRAIN.BIAS_DECAY = False
# Whether to add ground truth boxes to the pool when sampling regions
__C.TRAIN.USE_GT = False
# Whether to use aspect-ratio grouping of training images, introduced merely for saving
# GPU memory
__C.TRAIN.ASPECT_GROUPING = False
# The number of snapshots kept, older ones are deleted to save space
__C.TRAIN.SNAPSHOT_KEPT = 3
# The time interval for saving tensorflow summaries
__C.TRAIN.SUMMARY_INTERVAL = 180
# Scale to use during training (can list multiple scales)
# The scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
__C.TRAIN.MAX_SIZE = 1000
# Images to use per minibatch
__C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.1
# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True
# Train bounding-box regressors
__C.TRAIN.BBOX_REG = True
# Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5
# Iterations between snapshots
__C.TRAIN.SNAPSHOT_ITERS = 5000
# solver.prototxt specifies the snapshot path prefix, this adds an optional
# infix to yield the path: [_]_iters_XYZ.caffemodel
__C.TRAIN.SNAPSHOT_PREFIX = 'res101_faster_rcnn'
# Normalize the targets (subtract empirical mean, divide by empirical stddev)
__C.TRAIN.BBOX_NORMALIZE_TARGETS = True
# Deprecated (inside weights)
__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Normalize the targets using "precomputed" (or made up) means and stdevs
# (BBOX_NORMALIZE_TARGETS must also be True)
__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
# Train using these proposals
__C.TRAIN.PROPOSAL_METHOD = 'gt'
# Make minibatches from images that have similar aspect ratios (i.e. both
# tall and thin or both short and wide) in order to avoid wasting computation
# on zero-padding.
# Use RPN to detect objects
__C.TRAIN.HAS_RPN = True
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor satisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5
# Total number of examples
__C.TRAIN.RPN_BATCHSIZE = 256
# NMS threshold used on RPN proposals
__C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Deprecated (outside weights)
__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Give the positive RPN examples weight of p * 1 / {num positives}
# and give negatives a weight of (1 - p)
# Set to -1.0 to use uniform example weighting
__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
# Whether to use all ground truth bounding boxes for training,
# For COCO, setting USE_ALL_GT to False will exclude boxes that are flagged as ''iscrowd''
__C.TRAIN.USE_ALL_GT = True
#
# Testing options
#
__C.TEST = edict()
# Scale to use during testing (can NOT list multiple scales)
# The scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
__C.TEST.MAX_SIZE = 1000
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
__C.TEST.NMS = 0.3
# Experimental: treat the (K+1) units in the cls_score layer as linear
# predictors (trained, eg, with one-vs-rest SVMs).
__C.TEST.SVM = False
# Test using bounding-box regressors
__C.TEST.BBOX_REG = True
# Propose boxes
__C.TEST.HAS_RPN = False
# Test using these proposals
__C.TEST.PROPOSAL_METHOD = 'gt'
## NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
# __C.TEST.RPN_MIN_SIZE = 16
# Testing mode, default to be 'nms', 'top' is slower but better
# See report for details
__C.TEST.MODE = 'nms'
# Only useful when TEST.MODE is 'top', specifies the number of top proposals to select
__C.TEST.RPN_TOP_N = 5000
#
# ResNet options
#
__C.RESNET = edict()
# Option to set if max-pooling is appended after crop_and_resize.
# if true, the region will be resized to a square of 2xPOOLING_SIZE,
# then 2x2 max-pooling is applied; otherwise the region will be directly
# resized to a square of POOLING_SIZE
__C.RESNET.MAX_POOL = False
# Number of fixed blocks during training, by default the first of all 4 blocks is fixed
# Range: 0 (none) to 3 (all)
__C.RESNET.FIXED_BLOCKS = 1
#
# MobileNet options
#
__C.MOBILENET = edict()
# Whether to regularize the depth-wise filters during training
__C.MOBILENET.REGU_DEPTH = False
# Number of fixed layers during training, by default the bottom 5 of 14 layers is fixed
# Range: 0 (none) to 12 (all)
__C.MOBILENET.FIXED_LAYERS = 5
# Weight decay for the mobilenet weights
__C.MOBILENET.WEIGHT_DECAY = 0.00004
# Depth multiplier
__C.MOBILENET.DEPTH_MULTIPLIER = 1.
#
# MISC
#
# Pixel mean values (BGR order) as a (1, 1, 3) array
# We use the same pixel mean for all networks even though it's not exactly what
# they were trained with
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
# For reproducibility
__C.RNG_SEED = 3
# Root directory of project
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..', '..', '..'))
# Data directory
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Name (or path to) the matlab executable
__C.MATLAB = 'matlab'
# Place outputs under an experiments directory
__C.EXP_DIR = 'default'
# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True
# Use an end-to-end tensorflow model.
# Note: models in E2E tensorflow mode have only been tested in feed-forward mode,
# but these models are exportable to other tensorflow instances as GraphDef files.
__C.USE_E2E_TF = True
# Default pooling mode, only 'crop' is available
__C.POOLING_MODE = 'crop'
# Size of the pooled region after RoI pooling
__C.POOLING_SIZE = 7
# Anchor scales for RPN
__C.ANCHOR_SCALES = [8,16,32]
# Anchor ratios for RPN
__C.ANCHOR_RATIOS = [0.5,1,2]
# Number of filters for the RPN layer
__C.RPN_CHANNELS = 512
def get_output_dir(imdb, weights_filename):
"""Return the directory where experimental artifacts are placed.
If the directory does not exist, it is created.
A canonical path is built using the name from an imdb and a network
(if not None).
"""
outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
if weights_filename is None:
weights_filename = 'default'
outdir = osp.join(outdir, weights_filename)
if not os.path.exists(outdir):
os.makedirs(outdir)
return outdir
def get_output_tb_dir(imdb, weights_filename):
"""Return the directory where tensorflow summaries are placed.
If the directory does not exist, it is created.
A canonical path is built using the name from an imdb and a network
(if not None).
"""
outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'tensorboard', __C.EXP_DIR, imdb.name))
if weights_filename is None:
weights_filename = 'default'
outdir = osp.join(outdir, weights_filename)
if not os.path.exists(outdir):
os.makedirs(outdir)
return outdir
def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if type(a) is not edict:
return
for k, v in a.items():
# a must specify keys that are in b
if k not in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
old_type = type(b[k])
if old_type is not type(v):
if isinstance(b[k], np.ndarray):
v = np.array(v, dtype=b[k].dtype)
else:
raise ValueError(('Type mismatch ({} vs. {}) '
'for config key: {}').format(type(b[k]),
type(v), k))
# recursively merge dicts
if type(v) is edict:
try:
_merge_a_into_b(a[k], b[k])
except:
print(('Error under config key: {}'.format(k)))
raise
else:
b[k] = v
def cfg_from_file(filename):
"""Load a config file and merge it into the default options."""
import yaml
with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f))
_merge_a_into_b(yaml_cfg, __C)
def cfg_from_list(cfg_list):
"""Set config keys via list (e.g., from command line)."""
from ast import literal_eval
assert len(cfg_list) % 2 == 0
for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
key_list = k.split('.')
d = __C
for subkey in key_list[:-1]:
assert subkey in d
d = d[subkey]
subkey = key_list[-1]
assert subkey in d
try:
value = literal_eval(v)
except:
# handle the case when v is a string literal
value = v
assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'.format(
type(value), type(d[subkey]))
d[subkey] = value
================================================
FILE: src/tools/voc_eval_lib/model/nms_wrapper.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from model.config import cfg
from nms.gpu_nms import gpu_nms
from nms.cpu_nms import cpu_nms
def nms(dets, thresh, force_cpu=False):
"""Dispatch to either CPU or GPU NMS implementations."""
if dets.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(dets, thresh, device_id=0)
else:
return cpu_nms(dets, thresh)
================================================
FILE: src/tools/voc_eval_lib/model/test.py
================================================
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle as pickle
except ImportError:
import pickle
import os
import math
from utils.timer import Timer
from utils.blob import im_list_to_blob
from model.config import cfg, get_output_dir
from model.bbox_transform import clip_boxes, bbox_transform_inv
# from model.nms_wrapper import nms # need to compile cython nms before import nms
nms = None # not needed in pascal evaluation
def _get_image_blob(im):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
im_orig = im.astype(np.float32, copy=True)
im_orig -= cfg.PIXEL_MEANS
im_shape = im_orig.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
processed_ims = []
im_scale_factors = []
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
im_scale_factors.append(im_scale)
processed_ims.append(im)
# Create a blob to hold the input images
blob = im_list_to_blob(processed_ims)
return blob, np.array(im_scale_factors)
def _get_blobs(im):
"""Convert an image and RoIs within that image into network inputs."""
blobs = {}
blobs['data'], im_scale_factors = _get_image_blob(im)
return blobs, im_scale_factors
def _clip_boxes(boxes, im_shape):
"""Clip boxes to image boundaries."""
# x1 >= 0
boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
# y2 < im_shape[0]
boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
return boxes
def _rescale_boxes(boxes, inds, scales):
"""Rescale boxes according to image rescaling."""
for i in range(boxes.shape[0]):
boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
return boxes
def im_detect(sess, net, im):
blobs, im_scales = _get_blobs(im)
assert len(im_scales) == 1, "Only single-image batch implemented"
im_blob = blobs['data']
blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
_, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info'])
boxes = rois[:, 1:5] / im_scales[0]
scores = np.reshape(scores, [scores.shape[0], -1])
bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
if cfg.TEST.BBOX_REG:
# Apply bounding-box regression deltas
box_deltas = bbox_pred
pred_boxes = bbox_transform_inv(boxes, box_deltas)
pred_boxes = _clip_boxes(pred_boxes, im.shape)
else:
# Simply repeat the boxes, once for each class
pred_boxes = np.tile(boxes, (1, scores.shape[1]))
return scores, pred_boxes
def apply_nms(all_boxes, thresh):
"""Apply non-maximum suppression to all predicted boxes output by the
test_net method.
"""
num_classes = len(all_boxes)
num_images = len(all_boxes[0])
nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
for cls_ind in range(num_classes):
for im_ind in range(num_images):
dets = np.array(all_boxes[cls_ind][im_ind], dtype=np.float32)
if len(dets) == 0:
continue
#print('dets', dets)
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
inds = np.where((x2 > x1) & (y2 > y1))[0]
dets = dets[inds,:]
if dets == []:
continue
keep = nms(dets, thresh)
if len(keep) == 0:
continue
nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
return nms_boxes
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.):
np.random.seed(cfg.RNG_SEED)
"""Test a Fast R-CNN network on an image database."""
num_images = len(imdb.image_index)
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in range(num_images)]
for _ in range(imdb.num_classes)]
output_dir = get_output_dir(imdb, weights_filename)
# timers
_t = {'im_detect' : Timer(), 'misc' : Timer()}
for i in range(num_images):
im = cv2.imread(imdb.image_path_at(i))
_t['im_detect'].tic()
scores, boxes = im_detect(sess, net, im)
_t['im_detect'].toc()
_t['misc'].tic()
# skip j = 0, because it's the background class
for j in range(1, imdb.num_classes):
inds = np.where(scores[:, j] > thresh)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
keep = nms(cls_dets, cfg.TEST.NMS)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
# Limit to max_per_image detections *over all classes*
if max_per_image > 0:
image_scores = np.hstack([all_boxes[j][i][:, -1]
for j in range(1, imdb.num_classes)])
if len(image_scores) > max_per_image:
image_thresh = np.sort(image_scores)[-max_per_image]
for j in range(1, imdb.num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
.format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time))
det_file = os.path.join(output_dir, 'detections.pkl')
with open(det_file, 'wb') as f:
pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
print('Evaluating detections')
imdb.evaluate_detections(all_boxes, output_dir)
================================================
FILE: src/tools/voc_eval_lib/nms/.gitignore
================================================
================================================
FILE: src/tools/voc_eval_lib/nms/__init__.py
================================================
================================================
FILE: src/tools/voc_eval_lib/nms/cpu_nms.c
================================================
/* Generated by Cython 0.20.1 on Wed Oct 5 13:15:30 2016 */
#define PY_SSIZE_T_CLEAN
#ifndef CYTHON_USE_PYLONG_INTERNALS
#ifdef PYLONG_BITS_IN_DIGIT
#define CYTHON_USE_PYLONG_INTERNALS 0
#else
#include "pyconfig.h"
#ifdef PYLONG_BITS_IN_DIGIT
#define CYTHON_USE_PYLONG_INTERNALS 1
#else
#define CYTHON_USE_PYLONG_INTERNALS 0
#endif
#endif
#endif
#include "Python.h"
#ifndef Py_PYTHON_H
#error Python headers needed to compile C extensions, please install development version of Python.
#elif PY_VERSION_HEX < 0x02040000
#error Cython requires Python 2.4+.
#else
#define CYTHON_ABI "0_20_1"
#include /* For offsetof */
#ifndef offsetof
#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
#endif
#if !defined(WIN32) && !defined(MS_WINDOWS)
#ifndef __stdcall
#define __stdcall
#endif
#ifndef __cdecl
#define __cdecl
#endif
#ifndef __fastcall
#define __fastcall
#endif
#endif
#ifndef DL_IMPORT
#define DL_IMPORT(t) t
#endif
#ifndef DL_EXPORT
#define DL_EXPORT(t) t
#endif
#ifndef PY_LONG_LONG
#define PY_LONG_LONG LONG_LONG
#endif
#ifndef Py_HUGE_VAL
#define Py_HUGE_VAL HUGE_VAL
#endif
#ifdef PYPY_VERSION
#define CYTHON_COMPILING_IN_PYPY 1
#define CYTHON_COMPILING_IN_CPYTHON 0
#else
#define CYTHON_COMPILING_IN_PYPY 0
#define CYTHON_COMPILING_IN_CPYTHON 1
#endif
#if CYTHON_COMPILING_IN_PYPY
#define Py_OptimizeFlag 0
#endif
#if PY_VERSION_HEX < 0x02050000
typedef int Py_ssize_t;
#define PY_SSIZE_T_MAX INT_MAX
#define PY_SSIZE_T_MIN INT_MIN
#define PY_FORMAT_SIZE_T ""
#define CYTHON_FORMAT_SSIZE_T ""
#define PyInt_FromSsize_t(z) PyInt_FromLong(z)
#define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o)
#define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \
(PyErr_Format(PyExc_TypeError, \
"expected index value, got %.200s", Py_TYPE(o)->tp_name), \
(PyObject*)0))
#define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \
!PyComplex_Check(o))
#define PyIndex_Check __Pyx_PyIndex_Check
#define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
#define __PYX_BUILD_PY_SSIZE_T "i"
#else
#define __PYX_BUILD_PY_SSIZE_T "n"
#define CYTHON_FORMAT_SSIZE_T "z"
#define __Pyx_PyIndex_Check PyIndex_Check
#endif
#if PY_VERSION_HEX < 0x02060000
#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
#define PyVarObject_HEAD_INIT(type, size) \
PyObject_HEAD_INIT(type) size,
#define PyType_Modified(t)
typedef struct {
void *buf;
PyObject *obj;
Py_ssize_t len;
Py_ssize_t itemsize;
int readonly;
int ndim;
char *format;
Py_ssize_t *shape;
Py_ssize_t *strides;
Py_ssize_t *suboffsets;
void *internal;
} Py_buffer;
#define PyBUF_SIMPLE 0
#define PyBUF_WRITABLE 0x0001
#define PyBUF_FORMAT 0x0004
#define PyBUF_ND 0x0008
#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
#endif
#if PY_MAJOR_VERSION < 3
#define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#define __Pyx_DefaultClassType PyClass_Type
#else
#define __Pyx_BUILTIN_MODULE_NAME "builtins"
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#define __Pyx_DefaultClassType PyType_Type
#endif
#if PY_VERSION_HEX < 0x02060000
#define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
#endif
#if PY_MAJOR_VERSION >= 3
#define Py_TPFLAGS_CHECKTYPES 0
#define Py_TPFLAGS_HAVE_INDEX 0
#endif
#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
#define Py_TPFLAGS_HAVE_NEWBUFFER 0
#endif
#if PY_VERSION_HEX < 0x02060000
#define Py_TPFLAGS_HAVE_VERSION_TAG 0
#endif
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT)
#define Py_TPFLAGS_IS_ABSTRACT 0
#endif
#if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE)
#define Py_TPFLAGS_HAVE_FINALIZE 0
#endif
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
0 : _PyUnicode_Ready((PyObject *)(op)))
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
#define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#else
#define CYTHON_PEP393_ENABLED 0
#define __Pyx_PyUnicode_READY(op) (0)
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
#define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
#define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
#define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
#endif
#if CYTHON_COMPILING_IN_PYPY
#define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
#define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
#else
#define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
#define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
#endif
#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
#else
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBaseString_Type PyUnicode_Type
#define PyStringObject PyUnicodeObject
#define PyString_Type PyUnicode_Type
#define PyString_Check PyUnicode_Check
#define PyString_CheckExact PyUnicode_CheckExact
#endif
#if PY_VERSION_HEX < 0x02060000
#define PyBytesObject PyStringObject
#define PyBytes_Type PyString_Type
#define PyBytes_Check PyString_Check
#define PyBytes_CheckExact PyString_CheckExact
#define PyBytes_FromString PyString_FromString
#define PyBytes_FromStringAndSize PyString_FromStringAndSize
#define PyBytes_FromFormat PyString_FromFormat
#define PyBytes_DecodeEscape PyString_DecodeEscape
#define PyBytes_AsString PyString_AsString
#define PyBytes_AsStringAndSize PyString_AsStringAndSize
#define PyBytes_Size PyString_Size
#define PyBytes_AS_STRING PyString_AS_STRING
#define PyBytes_GET_SIZE PyString_GET_SIZE
#define PyBytes_Repr PyString_Repr
#define PyBytes_Concat PyString_Concat
#define PyBytes_ConcatAndDel PyString_ConcatAndDel
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
#else
#define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
PyString_Check(obj) || PyUnicode_Check(obj))
#define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
#endif
#if PY_VERSION_HEX < 0x02060000
#define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
#define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
#endif
#ifndef PySet_CheckExact
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
#endif
#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
#if PY_MAJOR_VERSION >= 3
#define PyIntObject PyLongObject
#define PyInt_Type PyLong_Type
#define PyInt_Check(op) PyLong_Check(op)
#define PyInt_CheckExact(op) PyLong_CheckExact(op)
#define PyInt_FromString PyLong_FromString
#define PyInt_FromUnicode PyLong_FromUnicode
#define PyInt_FromLong PyLong_FromLong
#define PyInt_FromSize_t PyLong_FromSize_t
#define PyInt_FromSsize_t PyLong_FromSsize_t
#define PyInt_AsLong PyLong_AsLong
#define PyInt_AS_LONG PyLong_AS_LONG
#define PyInt_AsSsize_t PyLong_AsSsize_t
#define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
#define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
#define PyNumber_Int PyNumber_Long
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBoolObject PyLongObject
#endif
#if PY_VERSION_HEX < 0x030200A4
typedef long Py_hash_t;
#define __Pyx_PyInt_FromHash_t PyInt_FromLong
#define __Pyx_PyInt_AsHash_t PyInt_AsLong
#else
#define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
#define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
#endif
#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
#define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
#define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
#define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
#else
#define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0)))
#define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1)))
#define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1)))
#endif
#if PY_MAJOR_VERSION >= 3
#define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
#endif
#if PY_VERSION_HEX < 0x02050000
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n)))
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n)))
#else
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n))
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n))
#endif
#if PY_VERSION_HEX < 0x02050000
#define __Pyx_NAMESTR(n) ((char *)(n))
#define __Pyx_DOCSTR(n) ((char *)(n))
#else
#define __Pyx_NAMESTR(n) (n)
#define __Pyx_DOCSTR(n) (n)
#endif
#ifndef CYTHON_INLINE
#if defined(__GNUC__)
#define CYTHON_INLINE __inline__
#elif defined(_MSC_VER)
#define CYTHON_INLINE __inline
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define CYTHON_INLINE inline
#else
#define CYTHON_INLINE
#endif
#endif
#ifndef CYTHON_RESTRICT
#if defined(__GNUC__)
#define CYTHON_RESTRICT __restrict__
#elif defined(_MSC_VER) && _MSC_VER >= 1400
#define CYTHON_RESTRICT __restrict
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define CYTHON_RESTRICT restrict
#else
#define CYTHON_RESTRICT
#endif
#endif
#ifdef NAN
#define __PYX_NAN() ((float) NAN)
#else
static CYTHON_INLINE float __PYX_NAN() {
/* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is
a quiet NaN. */
float value;
memset(&value, 0xFF, sizeof(value));
return value;
}
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
#else
#define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
#endif
#ifndef __PYX_EXTERN_C
#ifdef __cplusplus
#define __PYX_EXTERN_C extern "C"
#else
#define __PYX_EXTERN_C extern
#endif
#endif
#if defined(WIN32) || defined(MS_WINDOWS)
#define _USE_MATH_DEFINES
#endif
#include
#define __PYX_HAVE__nms__cpu_nms
#define __PYX_HAVE_API__nms__cpu_nms
#include "string.h"
#include "stdio.h"
#include "stdlib.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"
#ifdef _OPENMP
#include
#endif /* _OPENMP */
#ifdef PYREX_WITHOUT_ASSERTIONS
#define CYTHON_WITHOUT_ASSERTIONS
#endif
#ifndef CYTHON_UNUSED
# if defined(__GNUC__)
# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
#endif
typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
#define __PYX_DEFAULT_STRING_ENCODING ""
#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
(sizeof(type) < sizeof(Py_ssize_t)) || \
(sizeof(type) > sizeof(Py_ssize_t) && \
likely(v < (type)PY_SSIZE_T_MAX || \
v == (type)PY_SSIZE_T_MAX) && \
(!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \
v == (type)PY_SSIZE_T_MIN))) || \
(sizeof(type) == sizeof(Py_ssize_t) && \
(is_signed || likely(v < (type)PY_SSIZE_T_MAX || \
v == (type)PY_SSIZE_T_MAX))) )
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
#define __Pyx_PyBytes_FromString PyBytes_FromString
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#if PY_MAJOR_VERSION < 3
#define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#else
#define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
#endif
#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s)
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#if PY_MAJOR_VERSION < 3
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
{
const Py_UNICODE *u_end = u;
while (*u_end++) ;
return u_end - u - 1;
}
#else
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
#endif
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
#if CYTHON_COMPILING_IN_CPYTHON
#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
#else
#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
#endif
#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
PyObject* ascii_chars_u = NULL;
PyObject* ascii_chars_b = NULL;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
if (strcmp(PyBytes_AsString(default_encoding), "ascii") == 0) {
__Pyx_sys_getdefaultencoding_not_ascii = 0;
} else {
const char* default_encoding_c = PyBytes_AS_STRING(default_encoding);
char ascii_chars[128];
int c;
for (c = 0; c < 128; c++) {
ascii_chars[c] = c;
}
__Pyx_sys_getdefaultencoding_not_ascii = 1;
ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
if (ascii_chars_u == NULL) goto bad;
ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
if (ascii_chars_b == NULL || strncmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
PyErr_Format(
PyExc_ValueError,
"This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
default_encoding_c);
goto bad;
}
}
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(ascii_chars_u);
Py_XDECREF(ascii_chars_b);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(ascii_chars_u);
Py_XDECREF(ascii_chars_b);
return -1;
}
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
#else
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
static char* __PYX_DEFAULT_STRING_ENCODING;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
char* default_encoding_c;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
default_encoding_c = PyBytes_AS_STRING(default_encoding);
__PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
Py_DECREF(sys);
Py_DECREF(default_encoding);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return -1;
}
#endif
#endif
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
static PyObject *__pyx_m;
static PyObject *__pyx_d;
static PyObject *__pyx_b;
static PyObject *__pyx_empty_tuple;
static PyObject *__pyx_empty_bytes;
static int __pyx_lineno;
static int __pyx_clineno = 0;
static const char * __pyx_cfilenm= __FILE__;
static const char *__pyx_filename;
#if !defined(CYTHON_CCOMPLEX)
#if defined(__cplusplus)
#define CYTHON_CCOMPLEX 1
#elif defined(_Complex_I)
#define CYTHON_CCOMPLEX 1
#else
#define CYTHON_CCOMPLEX 0
#endif
#endif
#if CYTHON_CCOMPLEX
#ifdef __cplusplus
#include
#else
#include
#endif
#endif
#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
#undef _Complex_I
#define _Complex_I 1.0fj
#endif
static const char *__pyx_f[] = {
"cpu_nms.pyx",
"__init__.pxd",
"type.pxd",
};
#define IS_UNSIGNED(type) (((type) -1) > 0)
struct __Pyx_StructField_;
#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
typedef struct {
const char* name; /* for error messages only */
struct __Pyx_StructField_* fields;
size_t size; /* sizeof(type) */
size_t arraysize[8]; /* length of array in each dimension */
int ndim;
char typegroup; /* _R_eal, _C_omplex, Signed _I_nt, _U_nsigned int, _S_truct, _P_ointer, _O_bject, c_H_ar */
char is_unsigned;
int flags;
} __Pyx_TypeInfo;
typedef struct __Pyx_StructField_ {
__Pyx_TypeInfo* type;
const char* name;
size_t offset;
} __Pyx_StructField;
typedef struct {
__Pyx_StructField* field;
size_t parent_offset;
} __Pyx_BufFmt_StackElem;
typedef struct {
__Pyx_StructField root;
__Pyx_BufFmt_StackElem* head;
size_t fmt_offset;
size_t new_count, enc_count;
size_t struct_alignment;
int is_complex;
char enc_type;
char new_packmode;
char enc_packmode;
char is_valid_array;
} __Pyx_BufFmt_Context;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":723
* # in Cython to enable them only on the right systems.
*
* ctypedef npy_int8 int8_t # <<<<<<<<<<<<<<
* ctypedef npy_int16 int16_t
* ctypedef npy_int32 int32_t
*/
typedef npy_int8 __pyx_t_5numpy_int8_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":724
*
* ctypedef npy_int8 int8_t
* ctypedef npy_int16 int16_t # <<<<<<<<<<<<<<
* ctypedef npy_int32 int32_t
* ctypedef npy_int64 int64_t
*/
typedef npy_int16 __pyx_t_5numpy_int16_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":725
* ctypedef npy_int8 int8_t
* ctypedef npy_int16 int16_t
* ctypedef npy_int32 int32_t # <<<<<<<<<<<<<<
* ctypedef npy_int64 int64_t
* #ctypedef npy_int96 int96_t
*/
typedef npy_int32 __pyx_t_5numpy_int32_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726
* ctypedef npy_int16 int16_t
* ctypedef npy_int32 int32_t
* ctypedef npy_int64 int64_t # <<<<<<<<<<<<<<
* #ctypedef npy_int96 int96_t
* #ctypedef npy_int128 int128_t
*/
typedef npy_int64 __pyx_t_5numpy_int64_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":730
* #ctypedef npy_int128 int128_t
*
* ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<<
* ctypedef npy_uint16 uint16_t
* ctypedef npy_uint32 uint32_t
*/
typedef npy_uint8 __pyx_t_5numpy_uint8_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":731
*
* ctypedef npy_uint8 uint8_t
* ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<<
* ctypedef npy_uint32 uint32_t
* ctypedef npy_uint64 uint64_t
*/
typedef npy_uint16 __pyx_t_5numpy_uint16_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":732
* ctypedef npy_uint8 uint8_t
* ctypedef npy_uint16 uint16_t
* ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<<
* ctypedef npy_uint64 uint64_t
* #ctypedef npy_uint96 uint96_t
*/
typedef npy_uint32 __pyx_t_5numpy_uint32_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733
* ctypedef npy_uint16 uint16_t
* ctypedef npy_uint32 uint32_t
* ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<<
* #ctypedef npy_uint96 uint96_t
* #ctypedef npy_uint128 uint128_t
*/
typedef npy_uint64 __pyx_t_5numpy_uint64_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":737
* #ctypedef npy_uint128 uint128_t
*
* ctypedef npy_float32 float32_t # <<<<<<<<<<<<<<
* ctypedef npy_float64 float64_t
* #ctypedef npy_float80 float80_t
*/
typedef npy_float32 __pyx_t_5numpy_float32_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":738
*
* ctypedef npy_float32 float32_t
* ctypedef npy_float64 float64_t # <<<<<<<<<<<<<<
* #ctypedef npy_float80 float80_t
* #ctypedef npy_float128 float128_t
*/
typedef npy_float64 __pyx_t_5numpy_float64_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":747
* # The int types are mapped a bit surprising --
* # numpy.int corresponds to 'l' and numpy.long to 'q'
* ctypedef npy_long int_t # <<<<<<<<<<<<<<
* ctypedef npy_longlong long_t
* ctypedef npy_longlong longlong_t
*/
typedef npy_long __pyx_t_5numpy_int_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":748
* # numpy.int corresponds to 'l' and numpy.long to 'q'
* ctypedef npy_long int_t
* ctypedef npy_longlong long_t # <<<<<<<<<<<<<<
* ctypedef npy_longlong longlong_t
*
*/
typedef npy_longlong __pyx_t_5numpy_long_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":749
* ctypedef npy_long int_t
* ctypedef npy_longlong long_t
* ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<<
*
* ctypedef npy_ulong uint_t
*/
typedef npy_longlong __pyx_t_5numpy_longlong_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751
* ctypedef npy_longlong longlong_t
*
* ctypedef npy_ulong uint_t # <<<<<<<<<<<<<<
* ctypedef npy_ulonglong ulong_t
* ctypedef npy_ulonglong ulonglong_t
*/
typedef npy_ulong __pyx_t_5numpy_uint_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752
*
* ctypedef npy_ulong uint_t
* ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<<
* ctypedef npy_ulonglong ulonglong_t
*
*/
typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":753
* ctypedef npy_ulong uint_t
* ctypedef npy_ulonglong ulong_t
* ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<<
*
* ctypedef npy_intp intp_t
*/
typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755
* ctypedef npy_ulonglong ulonglong_t
*
* ctypedef npy_intp intp_t # <<<<<<<<<<<<<<
* ctypedef npy_uintp uintp_t
*
*/
typedef npy_intp __pyx_t_5numpy_intp_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756
*
* ctypedef npy_intp intp_t
* ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<<
*
* ctypedef npy_double float_t
*/
typedef npy_uintp __pyx_t_5numpy_uintp_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758
* ctypedef npy_uintp uintp_t
*
* ctypedef npy_double float_t # <<<<<<<<<<<<<<
* ctypedef npy_double double_t
* ctypedef npy_longdouble longdouble_t
*/
typedef npy_double __pyx_t_5numpy_float_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759
*
* ctypedef npy_double float_t
* ctypedef npy_double double_t # <<<<<<<<<<<<<<
* ctypedef npy_longdouble longdouble_t
*
*/
typedef npy_double __pyx_t_5numpy_double_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":760
* ctypedef npy_double float_t
* ctypedef npy_double double_t
* ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<<
*
* ctypedef npy_cfloat cfloat_t
*/
typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
#if CYTHON_CCOMPLEX
#ifdef __cplusplus
typedef ::std::complex< float > __pyx_t_float_complex;
#else
typedef float _Complex __pyx_t_float_complex;
#endif
#else
typedef struct { float real, imag; } __pyx_t_float_complex;
#endif
#if CYTHON_CCOMPLEX
#ifdef __cplusplus
typedef ::std::complex< double > __pyx_t_double_complex;
#else
typedef double _Complex __pyx_t_double_complex;
#endif
#else
typedef struct { double real, imag; } __pyx_t_double_complex;
#endif
/*--- Type declarations ---*/
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762
* ctypedef npy_longdouble longdouble_t
*
* ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<<
* ctypedef npy_cdouble cdouble_t
* ctypedef npy_clongdouble clongdouble_t
*/
typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763
*
* ctypedef npy_cfloat cfloat_t
* ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<<
* ctypedef npy_clongdouble clongdouble_t
*
*/
typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":764
* ctypedef npy_cfloat cfloat_t
* ctypedef npy_cdouble cdouble_t
* ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<<
*
* ctypedef npy_cdouble complex_t
*/
typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766
* ctypedef npy_clongdouble clongdouble_t
*
* ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<<
*
* cdef inline object PyArray_MultiIterNew1(a):
*/
typedef npy_cdouble __pyx_t_5numpy_complex_t;
#ifndef CYTHON_REFNANNY
#define CYTHON_REFNANNY 0
#endif
#if CYTHON_REFNANNY
typedef struct {
void (*INCREF)(void*, PyObject*, int);
void (*DECREF)(void*, PyObject*, int);
void (*GOTREF)(void*, PyObject*, int);
void (*GIVEREF)(void*, PyObject*, int);
void* (*SetupContext)(const char*, int, const char*);
void (*FinishContext)(void**);
} __Pyx_RefNannyAPIStruct;
static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
#define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
#ifdef WITH_THREAD
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
if (acquire_gil) { \
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
PyGILState_Release(__pyx_gilstate_save); \
} else { \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
}
#else
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
#endif
#define __Pyx_RefNannyFinishContext() \
__Pyx_RefNanny->FinishContext(&__pyx_refnanny)
#define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
#define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
#define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
#define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
#else
#define __Pyx_RefNannyDeclarations
#define __Pyx_RefNannySetupContext(name, acquire_gil)
#define __Pyx_RefNannyFinishContext()
#define __Pyx_INCREF(r) Py_INCREF(r)
#define __Pyx_DECREF(r) Py_DECREF(r)
#define __Pyx_GOTREF(r)
#define __Pyx_GIVEREF(r)
#define __Pyx_XINCREF(r) Py_XINCREF(r)
#define __Pyx_XDECREF(r) Py_XDECREF(r)
#define __Pyx_XGOTREF(r)
#define __Pyx_XGIVEREF(r)
#endif /* CYTHON_REFNANNY */
#define __Pyx_XDECREF_SET(r, v) do { \
PyObject *tmp = (PyObject *) r; \
r = v; __Pyx_XDECREF(tmp); \
} while (0)
#define __Pyx_DECREF_SET(r, v) do { \
PyObject *tmp = (PyObject *) r; \
r = v; __Pyx_DECREF(tmp); \
} while (0)
#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
PyTypeObject* tp = Py_TYPE(obj);
if (likely(tp->tp_getattro))
return tp->tp_getattro(obj, attr_name);
#if PY_MAJOR_VERSION < 3
if (likely(tp->tp_getattr))
return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
#endif
return PyObject_GetAttr(obj, attr_name);
}
#else
#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
#endif
static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/
static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/
static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/
static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \
PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \
const char* function_name); /*proto*/
static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
const char *name, int exact); /*proto*/
static CYTHON_INLINE int __Pyx_GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
__Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/
#else
#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
#endif
static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /*proto*/
static void __Pyx_RaiseBufferIndexError(int axis); /*proto*/
#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
PyListObject* L = (PyListObject*) list;
Py_ssize_t len = Py_SIZE(list);
if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
Py_INCREF(x);
PyList_SET_ITEM(list, len, x);
Py_SIZE(list) = len+1;
return 0;
}
return PyList_Append(list, x);
}
#else
#define __Pyx_PyList_Append(L,x) PyList_Append(L,x)
#endif
#ifndef __PYX_FORCE_INIT_THREADS
#define __PYX_FORCE_INIT_THREADS 0
#endif
static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
typedef struct {
Py_ssize_t shape, strides, suboffsets;
} __Pyx_Buf_DimInfo;
typedef struct {
size_t refcount;
Py_buffer pybuffer;
} __Pyx_Buffer;
typedef struct {
__Pyx_Buffer *rcbuffer;
char *data;
__Pyx_Buf_DimInfo diminfo[8];
} __Pyx_LocalBuf_ND;
#if PY_MAJOR_VERSION < 3
static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
static void __Pyx_ReleaseBuffer(Py_buffer *view);
#else
#define __Pyx_GetBuffer PyObject_GetBuffer
#define __Pyx_ReleaseBuffer PyBuffer_Release
#endif
static Py_ssize_t __Pyx_zeros[] = {0, 0, 0, 0, 0, 0, 0, 0};
static Py_ssize_t __Pyx_minusones[] = {-1, -1, -1, -1, -1, -1, -1, -1};
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
#if CYTHON_CCOMPLEX
#ifdef __cplusplus
#define __Pyx_CREAL(z) ((z).real())
#define __Pyx_CIMAG(z) ((z).imag())
#else
#define __Pyx_CREAL(z) (__real__(z))
#define __Pyx_CIMAG(z) (__imag__(z))
#endif
#else
#define __Pyx_CREAL(z) ((z).real)
#define __Pyx_CIMAG(z) ((z).imag)
#endif
#if (defined(_WIN32) || defined(__clang__)) && defined(__cplusplus) && CYTHON_CCOMPLEX
#define __Pyx_SET_CREAL(z,x) ((z).real(x))
#define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
#else
#define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
#define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
#endif
static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
#if CYTHON_CCOMPLEX
#define __Pyx_c_eqf(a, b) ((a)==(b))
#define __Pyx_c_sumf(a, b) ((a)+(b))
#define __Pyx_c_difff(a, b) ((a)-(b))
#define __Pyx_c_prodf(a, b) ((a)*(b))
#define __Pyx_c_quotf(a, b) ((a)/(b))
#define __Pyx_c_negf(a) (-(a))
#ifdef __cplusplus
#define __Pyx_c_is_zerof(z) ((z)==(float)0)
#define __Pyx_c_conjf(z) (::std::conj(z))
#if 1
#define __Pyx_c_absf(z) (::std::abs(z))
#define __Pyx_c_powf(a, b) (::std::pow(a, b))
#endif
#else
#define __Pyx_c_is_zerof(z) ((z)==0)
#define __Pyx_c_conjf(z) (conjf(z))
#if 1
#define __Pyx_c_absf(z) (cabsf(z))
#define __Pyx_c_powf(a, b) (cpowf(a, b))
#endif
#endif
#else
static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex, __pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex, __pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex, __pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex, __pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex, __pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex);
static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex);
#if 1
static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex);
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex, __pyx_t_float_complex);
#endif
#endif
static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
#if CYTHON_CCOMPLEX
#define __Pyx_c_eq(a, b) ((a)==(b))
#define __Pyx_c_sum(a, b) ((a)+(b))
#define __Pyx_c_diff(a, b) ((a)-(b))
#define __Pyx_c_prod(a, b) ((a)*(b))
#define __Pyx_c_quot(a, b) ((a)/(b))
#define __Pyx_c_neg(a) (-(a))
#ifdef __cplusplus
#define __Pyx_c_is_zero(z) ((z)==(double)0)
#define __Pyx_c_conj(z) (::std::conj(z))
#if 1
#define __Pyx_c_abs(z) (::std::abs(z))
#define __Pyx_c_pow(a, b) (::std::pow(a, b))
#endif
#else
#define __Pyx_c_is_zero(z) ((z)==0)
#define __Pyx_c_conj(z) (conj(z))
#if 1
#define __Pyx_c_abs(z) (cabs(z))
#define __Pyx_c_pow(a, b) (cpow(a, b))
#endif
#endif
#else
static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex, __pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex, __pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex, __pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex, __pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex, __pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex);
static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex);
#if 1
static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex);
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex, __pyx_t_double_complex);
#endif
#endif
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
static int __Pyx_check_binary_version(void);
#if !defined(__Pyx_PyIdentifier_FromString)
#if PY_MAJOR_VERSION < 3
#define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
#else
#define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
#endif
#endif
static PyObject *__Pyx_ImportModule(const char *name); /*proto*/
static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict); /*proto*/
typedef struct {
int code_line;
PyCodeObject* code_object;
} __Pyx_CodeObjectCacheEntry;
struct __Pyx_CodeObjectCache {
int count;
int max_count;
__Pyx_CodeObjectCacheEntry* entries;
};
static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
static PyCodeObject *__pyx_find_code_object(int code_line);
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
static void __Pyx_AddTraceback(const char *funcname, int c_line,
int py_line, const char *filename); /*proto*/
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
/* Module declarations from 'cpython.buffer' */
/* Module declarations from 'cpython.ref' */
/* Module declarations from 'libc.string' */
/* Module declarations from 'libc.stdio' */
/* Module declarations from 'cpython.object' */
/* Module declarations from '__builtin__' */
/* Module declarations from 'cpython.type' */
static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
/* Module declarations from 'libc.stdlib' */
/* Module declarations from 'numpy' */
/* Module declarations from 'numpy' */
static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/
/* Module declarations from 'nms.cpu_nms' */
static CYTHON_INLINE __pyx_t_5numpy_float32_t __pyx_f_3nms_7cpu_nms_max(__pyx_t_5numpy_float32_t, __pyx_t_5numpy_float32_t); /*proto*/
static CYTHON_INLINE __pyx_t_5numpy_float32_t __pyx_f_3nms_7cpu_nms_min(__pyx_t_5numpy_float32_t, __pyx_t_5numpy_float32_t); /*proto*/
static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t = { "float32_t", NULL, sizeof(__pyx_t_5numpy_float32_t), { 0 }, 0, 'R', 0, 0 };
static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int_t = { "int_t", NULL, sizeof(__pyx_t_5numpy_int_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_int_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_int_t), 0 };
#define __Pyx_MODULE_NAME "nms.cpu_nms"
int __pyx_module_is_main_nms__cpu_nms = 0;
/* Implementation of 'nms.cpu_nms' */
static PyObject *__pyx_builtin_range;
static PyObject *__pyx_builtin_ValueError;
static PyObject *__pyx_builtin_RuntimeError;
static PyObject *__pyx_pf_3nms_7cpu_nms_cpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh); /* proto */
static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */
static char __pyx_k_B[] = "B";
static char __pyx_k_H[] = "H";
static char __pyx_k_I[] = "I";
static char __pyx_k_L[] = "L";
static char __pyx_k_O[] = "O";
static char __pyx_k_Q[] = "Q";
static char __pyx_k_b[] = "b";
static char __pyx_k_d[] = "d";
static char __pyx_k_f[] = "f";
static char __pyx_k_g[] = "g";
static char __pyx_k_h[] = "h";
static char __pyx_k_i[] = "i";
static char __pyx_k_j[] = "_j";
static char __pyx_k_l[] = "l";
static char __pyx_k_q[] = "q";
static char __pyx_k_w[] = "w";
static char __pyx_k_Zd[] = "Zd";
static char __pyx_k_Zf[] = "Zf";
static char __pyx_k_Zg[] = "Zg";
static char __pyx_k_np[] = "np";
static char __pyx_k_x1[] = "x1";
static char __pyx_k_x2[] = "x2";
static char __pyx_k_y1[] = "y1";
static char __pyx_k_y2[] = "y2";
static char __pyx_k_i_2[] = "_i";
static char __pyx_k_int[] = "int";
static char __pyx_k_ix1[] = "ix1";
static char __pyx_k_ix2[] = "ix2";
static char __pyx_k_iy1[] = "iy1";
static char __pyx_k_iy2[] = "iy2";
static char __pyx_k_j_2[] = "j";
static char __pyx_k_ovr[] = "ovr";
static char __pyx_k_xx1[] = "xx1";
static char __pyx_k_xx2[] = "xx2";
static char __pyx_k_yy1[] = "yy1";
static char __pyx_k_yy2[] = "yy2";
static char __pyx_k_dets[] = "dets";
static char __pyx_k_keep[] = "keep";
static char __pyx_k_main[] = "__main__";
static char __pyx_k_test[] = "__test__";
static char __pyx_k_areas[] = "areas";
static char __pyx_k_dtype[] = "dtype";
static char __pyx_k_iarea[] = "iarea";
static char __pyx_k_inter[] = "inter";
static char __pyx_k_ndets[] = "ndets";
static char __pyx_k_numpy[] = "numpy";
static char __pyx_k_order[] = "order";
static char __pyx_k_range[] = "range";
static char __pyx_k_zeros[] = "zeros";
static char __pyx_k_import[] = "__import__";
static char __pyx_k_scores[] = "scores";
static char __pyx_k_thresh[] = "thresh";
static char __pyx_k_argsort[] = "argsort";
static char __pyx_k_cpu_nms[] = "cpu_nms";
static char __pyx_k_ValueError[] = "ValueError";
static char __pyx_k_suppressed[] = "suppressed";
static char __pyx_k_nms_cpu_nms[] = "nms.cpu_nms";
static char __pyx_k_RuntimeError[] = "RuntimeError";
static char __pyx_k_pyx_getbuffer[] = "__pyx_getbuffer";
static char __pyx_k_pyx_releasebuffer[] = "__pyx_releasebuffer";
static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous";
static char __pyx_k_nfs_yoda_xinleic_Inf_Code_Faste[] = "/nfs.yoda/xinleic/Inf/Code/Faster-RCNN_TF/lib/nms/cpu_nms.pyx";
static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)";
static char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd";
static char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported";
static char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous";
static char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short.";
static PyObject *__pyx_kp_u_Format_string_allocated_too_shor;
static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2;
static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor;
static PyObject *__pyx_n_s_RuntimeError;
static PyObject *__pyx_n_s_ValueError;
static PyObject *__pyx_n_s_areas;
static PyObject *__pyx_n_s_argsort;
static PyObject *__pyx_n_s_cpu_nms;
static PyObject *__pyx_n_s_dets;
static PyObject *__pyx_n_s_dtype;
static PyObject *__pyx_n_s_h;
static PyObject *__pyx_n_s_i;
static PyObject *__pyx_n_s_i_2;
static PyObject *__pyx_n_s_iarea;
static PyObject *__pyx_n_s_import;
static PyObject *__pyx_n_s_int;
static PyObject *__pyx_n_s_inter;
static PyObject *__pyx_n_s_ix1;
static PyObject *__pyx_n_s_ix2;
static PyObject *__pyx_n_s_iy1;
static PyObject *__pyx_n_s_iy2;
static PyObject *__pyx_n_s_j;
static PyObject *__pyx_n_s_j_2;
static PyObject *__pyx_n_s_keep;
static PyObject *__pyx_n_s_main;
static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous;
static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou;
static PyObject *__pyx_n_s_ndets;
static PyObject *__pyx_kp_s_nfs_yoda_xinleic_Inf_Code_Faste;
static PyObject *__pyx_n_s_nms_cpu_nms;
static PyObject *__pyx_n_s_np;
static PyObject *__pyx_n_s_numpy;
static PyObject *__pyx_n_s_order;
static PyObject *__pyx_n_s_ovr;
static PyObject *__pyx_n_s_pyx_getbuffer;
static PyObject *__pyx_n_s_pyx_releasebuffer;
static PyObject *__pyx_n_s_range;
static PyObject *__pyx_n_s_scores;
static PyObject *__pyx_n_s_suppressed;
static PyObject *__pyx_n_s_test;
static PyObject *__pyx_n_s_thresh;
static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd;
static PyObject *__pyx_n_s_w;
static PyObject *__pyx_n_s_x1;
static PyObject *__pyx_n_s_x2;
static PyObject *__pyx_n_s_xx1;
static PyObject *__pyx_n_s_xx2;
static PyObject *__pyx_n_s_y1;
static PyObject *__pyx_n_s_y2;
static PyObject *__pyx_n_s_yy1;
static PyObject *__pyx_n_s_yy2;
static PyObject *__pyx_n_s_zeros;
static PyObject *__pyx_int_0;
static PyObject *__pyx_int_1;
static PyObject *__pyx_int_2;
static PyObject *__pyx_int_3;
static PyObject *__pyx_int_4;
static PyObject *__pyx_int_neg_1;
static PyObject *__pyx_slice_;
static PyObject *__pyx_slice__3;
static PyObject *__pyx_slice__5;
static PyObject *__pyx_slice__7;
static PyObject *__pyx_slice__9;
static PyObject *__pyx_tuple__2;
static PyObject *__pyx_tuple__4;
static PyObject *__pyx_tuple__6;
static PyObject *__pyx_tuple__8;
static PyObject *__pyx_slice__11;
static PyObject *__pyx_tuple__10;
static PyObject *__pyx_tuple__12;
static PyObject *__pyx_tuple__13;
static PyObject *__pyx_tuple__14;
static PyObject *__pyx_tuple__15;
static PyObject *__pyx_tuple__16;
static PyObject *__pyx_tuple__17;
static PyObject *__pyx_tuple__18;
static PyObject *__pyx_codeobj__19;
/* "nms/cpu_nms.pyx":11
* cimport numpy as np
*
* cdef inline np.float32_t max(np.float32_t a, np.float32_t b): # <<<<<<<<<<<<<<
* return a if a >= b else b
*
*/
static CYTHON_INLINE __pyx_t_5numpy_float32_t __pyx_f_3nms_7cpu_nms_max(__pyx_t_5numpy_float32_t __pyx_v_a, __pyx_t_5numpy_float32_t __pyx_v_b) {
__pyx_t_5numpy_float32_t __pyx_r;
__Pyx_RefNannyDeclarations
__pyx_t_5numpy_float32_t __pyx_t_1;
__Pyx_RefNannySetupContext("max", 0);
/* "nms/cpu_nms.pyx":12
*
* cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
* return a if a >= b else b # <<<<<<<<<<<<<<
*
* cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
*/
if (((__pyx_v_a >= __pyx_v_b) != 0)) {
__pyx_t_1 = __pyx_v_a;
} else {
__pyx_t_1 = __pyx_v_b;
}
__pyx_r = __pyx_t_1;
goto __pyx_L0;
/* "nms/cpu_nms.pyx":11
* cimport numpy as np
*
* cdef inline np.float32_t max(np.float32_t a, np.float32_t b): # <<<<<<<<<<<<<<
* return a if a >= b else b
*
*/
/* function exit code */
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "nms/cpu_nms.pyx":14
* return a if a >= b else b
*
* cdef inline np.float32_t min(np.float32_t a, np.float32_t b): # <<<<<<<<<<<<<<
* return a if a <= b else b
*
*/
static CYTHON_INLINE __pyx_t_5numpy_float32_t __pyx_f_3nms_7cpu_nms_min(__pyx_t_5numpy_float32_t __pyx_v_a, __pyx_t_5numpy_float32_t __pyx_v_b) {
__pyx_t_5numpy_float32_t __pyx_r;
__Pyx_RefNannyDeclarations
__pyx_t_5numpy_float32_t __pyx_t_1;
__Pyx_RefNannySetupContext("min", 0);
/* "nms/cpu_nms.pyx":15
*
* cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
* return a if a <= b else b # <<<<<<<<<<<<<<
*
* def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
*/
if (((__pyx_v_a <= __pyx_v_b) != 0)) {
__pyx_t_1 = __pyx_v_a;
} else {
__pyx_t_1 = __pyx_v_b;
}
__pyx_r = __pyx_t_1;
goto __pyx_L0;
/* "nms/cpu_nms.pyx":14
* return a if a >= b else b
*
* cdef inline np.float32_t min(np.float32_t a, np.float32_t b): # <<<<<<<<<<<<<<
* return a if a <= b else b
*
*/
/* function exit code */
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "nms/cpu_nms.pyx":17
* return a if a <= b else b
*
* def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
*/
/* Python wrapper */
static PyObject *__pyx_pw_3nms_7cpu_nms_1cpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_3nms_7cpu_nms_1cpu_nms = {__Pyx_NAMESTR("cpu_nms"), (PyCFunction)__pyx_pw_3nms_7cpu_nms_1cpu_nms, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)};
static PyObject *__pyx_pw_3nms_7cpu_nms_1cpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
PyArrayObject *__pyx_v_dets = 0;
PyObject *__pyx_v_thresh = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("cpu_nms (wrapper)", 0);
{
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_dets,&__pyx_n_s_thresh,0};
PyObject* values[2] = {0,0};
if (unlikely(__pyx_kwds)) {
Py_ssize_t kw_args;
const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
switch (pos_args) {
case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
kw_args = PyDict_Size(__pyx_kwds);
switch (pos_args) {
case 0:
if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_dets)) != 0)) kw_args--;
else goto __pyx_L5_argtuple_error;
case 1:
if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_thresh)) != 0)) kw_args--;
else {
__Pyx_RaiseArgtupleInvalid("cpu_nms", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
}
}
if (unlikely(kw_args > 0)) {
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "cpu_nms") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
}
} else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
}
__pyx_v_dets = ((PyArrayObject *)values[0]);
__pyx_v_thresh = ((PyObject*)values[1]);
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("cpu_nms", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
__pyx_L3_error:;
__Pyx_AddTraceback("nms.cpu_nms.cpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets), __pyx_ptype_5numpy_ndarray, 1, "dets", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_thresh), (&PyFloat_Type), 1, "thresh", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_r = __pyx_pf_3nms_7cpu_nms_cpu_nms(__pyx_self, __pyx_v_dets, __pyx_v_thresh);
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_3nms_7cpu_nms_cpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh) {
PyArrayObject *__pyx_v_x1 = 0;
PyArrayObject *__pyx_v_y1 = 0;
PyArrayObject *__pyx_v_x2 = 0;
PyArrayObject *__pyx_v_y2 = 0;
PyArrayObject *__pyx_v_scores = 0;
PyArrayObject *__pyx_v_areas = 0;
PyArrayObject *__pyx_v_order = 0;
int __pyx_v_ndets;
PyArrayObject *__pyx_v_suppressed = 0;
int __pyx_v__i;
int __pyx_v__j;
int __pyx_v_i;
int __pyx_v_j;
__pyx_t_5numpy_float32_t __pyx_v_ix1;
__pyx_t_5numpy_float32_t __pyx_v_iy1;
__pyx_t_5numpy_float32_t __pyx_v_ix2;
__pyx_t_5numpy_float32_t __pyx_v_iy2;
__pyx_t_5numpy_float32_t __pyx_v_iarea;
__pyx_t_5numpy_float32_t __pyx_v_xx1;
__pyx_t_5numpy_float32_t __pyx_v_yy1;
__pyx_t_5numpy_float32_t __pyx_v_xx2;
__pyx_t_5numpy_float32_t __pyx_v_yy2;
__pyx_t_5numpy_float32_t __pyx_v_w;
__pyx_t_5numpy_float32_t __pyx_v_h;
__pyx_t_5numpy_float32_t __pyx_v_inter;
__pyx_t_5numpy_float32_t __pyx_v_ovr;
PyObject *__pyx_v_keep = NULL;
__Pyx_LocalBuf_ND __pyx_pybuffernd_areas;
__Pyx_Buffer __pyx_pybuffer_areas;
__Pyx_LocalBuf_ND __pyx_pybuffernd_dets;
__Pyx_Buffer __pyx_pybuffer_dets;
__Pyx_LocalBuf_ND __pyx_pybuffernd_order;
__Pyx_Buffer __pyx_pybuffer_order;
__Pyx_LocalBuf_ND __pyx_pybuffernd_scores;
__Pyx_Buffer __pyx_pybuffer_scores;
__Pyx_LocalBuf_ND __pyx_pybuffernd_suppressed;
__Pyx_Buffer __pyx_pybuffer_suppressed;
__Pyx_LocalBuf_ND __pyx_pybuffernd_x1;
__Pyx_Buffer __pyx_pybuffer_x1;
__Pyx_LocalBuf_ND __pyx_pybuffernd_x2;
__Pyx_Buffer __pyx_pybuffer_x2;
__Pyx_LocalBuf_ND __pyx_pybuffernd_y1;
__Pyx_Buffer __pyx_pybuffer_y1;
__Pyx_LocalBuf_ND __pyx_pybuffernd_y2;
__Pyx_Buffer __pyx_pybuffer_y2;
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
PyArrayObject *__pyx_t_2 = NULL;
PyArrayObject *__pyx_t_3 = NULL;
PyArrayObject *__pyx_t_4 = NULL;
PyArrayObject *__pyx_t_5 = NULL;
PyArrayObject *__pyx_t_6 = NULL;
PyObject *__pyx_t_7 = NULL;
PyObject *__pyx_t_8 = NULL;
PyArrayObject *__pyx_t_9 = NULL;
PyArrayObject *__pyx_t_10 = NULL;
PyObject *__pyx_t_11 = NULL;
PyObject *__pyx_t_12 = NULL;
PyArrayObject *__pyx_t_13 = NULL;
int __pyx_t_14;
int __pyx_t_15;
int __pyx_t_16;
int __pyx_t_17;
int __pyx_t_18;
int __pyx_t_19;
int __pyx_t_20;
int __pyx_t_21;
int __pyx_t_22;
int __pyx_t_23;
int __pyx_t_24;
int __pyx_t_25;
int __pyx_t_26;
int __pyx_t_27;
int __pyx_t_28;
int __pyx_t_29;
int __pyx_t_30;
int __pyx_t_31;
int __pyx_t_32;
int __pyx_t_33;
int __pyx_t_34;
__pyx_t_5numpy_float32_t __pyx_t_35;
int __pyx_t_36;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("cpu_nms", 0);
__pyx_pybuffer_x1.pybuffer.buf = NULL;
__pyx_pybuffer_x1.refcount = 0;
__pyx_pybuffernd_x1.data = NULL;
__pyx_pybuffernd_x1.rcbuffer = &__pyx_pybuffer_x1;
__pyx_pybuffer_y1.pybuffer.buf = NULL;
__pyx_pybuffer_y1.refcount = 0;
__pyx_pybuffernd_y1.data = NULL;
__pyx_pybuffernd_y1.rcbuffer = &__pyx_pybuffer_y1;
__pyx_pybuffer_x2.pybuffer.buf = NULL;
__pyx_pybuffer_x2.refcount = 0;
__pyx_pybuffernd_x2.data = NULL;
__pyx_pybuffernd_x2.rcbuffer = &__pyx_pybuffer_x2;
__pyx_pybuffer_y2.pybuffer.buf = NULL;
__pyx_pybuffer_y2.refcount = 0;
__pyx_pybuffernd_y2.data = NULL;
__pyx_pybuffernd_y2.rcbuffer = &__pyx_pybuffer_y2;
__pyx_pybuffer_scores.pybuffer.buf = NULL;
__pyx_pybuffer_scores.refcount = 0;
__pyx_pybuffernd_scores.data = NULL;
__pyx_pybuffernd_scores.rcbuffer = &__pyx_pybuffer_scores;
__pyx_pybuffer_areas.pybuffer.buf = NULL;
__pyx_pybuffer_areas.refcount = 0;
__pyx_pybuffernd_areas.data = NULL;
__pyx_pybuffernd_areas.rcbuffer = &__pyx_pybuffer_areas;
__pyx_pybuffer_order.pybuffer.buf = NULL;
__pyx_pybuffer_order.refcount = 0;
__pyx_pybuffernd_order.data = NULL;
__pyx_pybuffernd_order.rcbuffer = &__pyx_pybuffer_order;
__pyx_pybuffer_suppressed.pybuffer.buf = NULL;
__pyx_pybuffer_suppressed.refcount = 0;
__pyx_pybuffernd_suppressed.data = NULL;
__pyx_pybuffernd_suppressed.rcbuffer = &__pyx_pybuffer_suppressed;
__pyx_pybuffer_dets.pybuffer.buf = NULL;
__pyx_pybuffer_dets.refcount = 0;
__pyx_pybuffernd_dets.data = NULL;
__pyx_pybuffernd_dets.rcbuffer = &__pyx_pybuffer_dets;
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_pybuffernd_dets.diminfo[0].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets.diminfo[0].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets.diminfo[1].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets.diminfo[1].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[1];
/* "nms/cpu_nms.pyx":18
*
* def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
* cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
* cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
*/
__pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__2); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_2 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_x1.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_x1 = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_x1.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_x1.diminfo[0].strides = __pyx_pybuffernd_x1.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_x1.diminfo[0].shape = __pyx_pybuffernd_x1.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_2 = 0;
__pyx_v_x1 = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":19
* def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
* cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
* cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
*/
__pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__4); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_3 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_y1.rcbuffer->pybuffer, (PyObject*)__pyx_t_3, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_y1 = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_y1.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_y1.diminfo[0].strides = __pyx_pybuffernd_y1.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_y1.diminfo[0].shape = __pyx_pybuffernd_y1.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_3 = 0;
__pyx_v_y1 = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":20
* cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
* cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
* cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
*/
__pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__6); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_4 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_x2.rcbuffer->pybuffer, (PyObject*)__pyx_t_4, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_x2 = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_x2.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_x2.diminfo[0].strides = __pyx_pybuffernd_x2.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_x2.diminfo[0].shape = __pyx_pybuffernd_x2.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_4 = 0;
__pyx_v_x2 = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":21
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
* cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
* cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
*
*/
__pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__8); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_5 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_y2.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_y2 = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_y2.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_y2.diminfo[0].strides = __pyx_pybuffernd_y2.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_y2.diminfo[0].shape = __pyx_pybuffernd_y2.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_5 = 0;
__pyx_v_y2 = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":22
* cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
* cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
* cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] # <<<<<<<<<<<<<<
*
* cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
*/
__pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__10); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_6 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_scores.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_scores = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_scores.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_scores.diminfo[0].strides = __pyx_pybuffernd_scores.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_scores.diminfo[0].shape = __pyx_pybuffernd_scores.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_6 = 0;
__pyx_v_scores = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":24
* cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
*
* cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) # <<<<<<<<<<<<<<
* cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
*
*/
__pyx_t_1 = PyNumber_Subtract(((PyObject *)__pyx_v_x2), ((PyObject *)__pyx_v_x1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_7 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_7);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = PyNumber_Subtract(((PyObject *)__pyx_v_y2), ((PyObject *)__pyx_v_y1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = PyNumber_Multiply(__pyx_t_7, __pyx_t_8); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_9 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_areas.rcbuffer->pybuffer, (PyObject*)__pyx_t_9, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_areas = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_areas.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_areas.diminfo[0].strides = __pyx_pybuffernd_areas.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_areas.diminfo[0].shape = __pyx_pybuffernd_areas.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_9 = 0;
__pyx_v_areas = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":25
*
* cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
* cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] # <<<<<<<<<<<<<<
*
* cdef int ndets = dets.shape[0]
*/
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_scores), __pyx_n_s_argsort); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = PyObject_GetItem(__pyx_t_8, __pyx_slice__11); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_10 = ((PyArrayObject *)__pyx_t_1);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_order.rcbuffer->pybuffer, (PyObject*)__pyx_t_10, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_order = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_order.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_order.diminfo[0].strides = __pyx_pybuffernd_order.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_order.diminfo[0].shape = __pyx_pybuffernd_order.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_10 = 0;
__pyx_v_order = ((PyArrayObject *)__pyx_t_1);
__pyx_t_1 = 0;
/* "nms/cpu_nms.pyx":27
* cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
*
* cdef int ndets = dets.shape[0] # <<<<<<<<<<<<<<
* cdef np.ndarray[np.int_t, ndim=1] suppressed = \
* np.zeros((ndets), dtype=np.int)
*/
__pyx_v_ndets = (__pyx_v_dets->dimensions[0]);
/* "nms/cpu_nms.pyx":29
* cdef int ndets = dets.shape[0]
* cdef np.ndarray[np.int_t, ndim=1] suppressed = \
* np.zeros((ndets), dtype=np.int) # <<<<<<<<<<<<<<
*
* # nominal indices
*/
__pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_ndets); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_7);
PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1);
__Pyx_GIVEREF(__pyx_t_1);
__pyx_t_1 = 0;
__pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_11);
__pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_t_11, __pyx_n_s_int); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_12);
__Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
__pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_7, __pyx_t_1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_12);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_13 = ((PyArrayObject *)__pyx_t_12);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_suppressed.rcbuffer->pybuffer, (PyObject*)__pyx_t_13, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 1, 0, __pyx_stack) == -1)) {
__pyx_v_suppressed = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.buf = NULL;
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
} else {__pyx_pybuffernd_suppressed.diminfo[0].strides = __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_suppressed.diminfo[0].shape = __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_13 = 0;
__pyx_v_suppressed = ((PyArrayObject *)__pyx_t_12);
__pyx_t_12 = 0;
/* "nms/cpu_nms.pyx":42
* cdef np.float32_t inter, ovr
*
* keep = [] # <<<<<<<<<<<<<<
* for _i in range(ndets):
* i = order[_i]
*/
__pyx_t_12 = PyList_New(0); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_12);
__pyx_v_keep = ((PyObject*)__pyx_t_12);
__pyx_t_12 = 0;
/* "nms/cpu_nms.pyx":43
*
* keep = []
* for _i in range(ndets): # <<<<<<<<<<<<<<
* i = order[_i]
* if suppressed[i] == 1:
*/
__pyx_t_14 = __pyx_v_ndets;
for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_14; __pyx_t_15+=1) {
__pyx_v__i = __pyx_t_15;
/* "nms/cpu_nms.pyx":44
* keep = []
* for _i in range(ndets):
* i = order[_i] # <<<<<<<<<<<<<<
* if suppressed[i] == 1:
* continue
*/
__pyx_t_16 = __pyx_v__i;
__pyx_t_17 = -1;
if (__pyx_t_16 < 0) {
__pyx_t_16 += __pyx_pybuffernd_order.diminfo[0].shape;
if (unlikely(__pyx_t_16 < 0)) __pyx_t_17 = 0;
} else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_order.diminfo[0].shape)) __pyx_t_17 = 0;
if (unlikely(__pyx_t_17 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_17);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_i = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int_t *, __pyx_pybuffernd_order.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_order.diminfo[0].strides));
/* "nms/cpu_nms.pyx":45
* for _i in range(ndets):
* i = order[_i]
* if suppressed[i] == 1: # <<<<<<<<<<<<<<
* continue
* keep.append(i)
*/
__pyx_t_17 = __pyx_v_i;
__pyx_t_18 = -1;
if (__pyx_t_17 < 0) {
__pyx_t_17 += __pyx_pybuffernd_suppressed.diminfo[0].shape;
if (unlikely(__pyx_t_17 < 0)) __pyx_t_18 = 0;
} else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_suppressed.diminfo[0].shape)) __pyx_t_18 = 0;
if (unlikely(__pyx_t_18 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_18);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_19 = (((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int_t *, __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_suppressed.diminfo[0].strides)) == 1) != 0);
if (__pyx_t_19) {
/* "nms/cpu_nms.pyx":46
* i = order[_i]
* if suppressed[i] == 1:
* continue # <<<<<<<<<<<<<<
* keep.append(i)
* ix1 = x1[i]
*/
goto __pyx_L3_continue;
}
/* "nms/cpu_nms.pyx":47
* if suppressed[i] == 1:
* continue
* keep.append(i) # <<<<<<<<<<<<<<
* ix1 = x1[i]
* iy1 = y1[i]
*/
__pyx_t_12 = __Pyx_PyInt_From_int(__pyx_v_i); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_12);
__pyx_t_20 = __Pyx_PyList_Append(__pyx_v_keep, __pyx_t_12); if (unlikely(__pyx_t_20 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
/* "nms/cpu_nms.pyx":48
* continue
* keep.append(i)
* ix1 = x1[i] # <<<<<<<<<<<<<<
* iy1 = y1[i]
* ix2 = x2[i]
*/
__pyx_t_18 = __pyx_v_i;
__pyx_t_21 = -1;
if (__pyx_t_18 < 0) {
__pyx_t_18 += __pyx_pybuffernd_x1.diminfo[0].shape;
if (unlikely(__pyx_t_18 < 0)) __pyx_t_21 = 0;
} else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_x1.diminfo[0].shape)) __pyx_t_21 = 0;
if (unlikely(__pyx_t_21 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_21);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_ix1 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_x1.rcbuffer->pybuffer.buf, __pyx_t_18, __pyx_pybuffernd_x1.diminfo[0].strides));
/* "nms/cpu_nms.pyx":49
* keep.append(i)
* ix1 = x1[i]
* iy1 = y1[i] # <<<<<<<<<<<<<<
* ix2 = x2[i]
* iy2 = y2[i]
*/
__pyx_t_21 = __pyx_v_i;
__pyx_t_22 = -1;
if (__pyx_t_21 < 0) {
__pyx_t_21 += __pyx_pybuffernd_y1.diminfo[0].shape;
if (unlikely(__pyx_t_21 < 0)) __pyx_t_22 = 0;
} else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_y1.diminfo[0].shape)) __pyx_t_22 = 0;
if (unlikely(__pyx_t_22 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_22);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_iy1 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_y1.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_y1.diminfo[0].strides));
/* "nms/cpu_nms.pyx":50
* ix1 = x1[i]
* iy1 = y1[i]
* ix2 = x2[i] # <<<<<<<<<<<<<<
* iy2 = y2[i]
* iarea = areas[i]
*/
__pyx_t_22 = __pyx_v_i;
__pyx_t_23 = -1;
if (__pyx_t_22 < 0) {
__pyx_t_22 += __pyx_pybuffernd_x2.diminfo[0].shape;
if (unlikely(__pyx_t_22 < 0)) __pyx_t_23 = 0;
} else if (unlikely(__pyx_t_22 >= __pyx_pybuffernd_x2.diminfo[0].shape)) __pyx_t_23 = 0;
if (unlikely(__pyx_t_23 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_23);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_ix2 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_x2.rcbuffer->pybuffer.buf, __pyx_t_22, __pyx_pybuffernd_x2.diminfo[0].strides));
/* "nms/cpu_nms.pyx":51
* iy1 = y1[i]
* ix2 = x2[i]
* iy2 = y2[i] # <<<<<<<<<<<<<<
* iarea = areas[i]
* for _j in range(_i + 1, ndets):
*/
__pyx_t_23 = __pyx_v_i;
__pyx_t_24 = -1;
if (__pyx_t_23 < 0) {
__pyx_t_23 += __pyx_pybuffernd_y2.diminfo[0].shape;
if (unlikely(__pyx_t_23 < 0)) __pyx_t_24 = 0;
} else if (unlikely(__pyx_t_23 >= __pyx_pybuffernd_y2.diminfo[0].shape)) __pyx_t_24 = 0;
if (unlikely(__pyx_t_24 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_24);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_iy2 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_y2.rcbuffer->pybuffer.buf, __pyx_t_23, __pyx_pybuffernd_y2.diminfo[0].strides));
/* "nms/cpu_nms.pyx":52
* ix2 = x2[i]
* iy2 = y2[i]
* iarea = areas[i] # <<<<<<<<<<<<<<
* for _j in range(_i + 1, ndets):
* j = order[_j]
*/
__pyx_t_24 = __pyx_v_i;
__pyx_t_25 = -1;
if (__pyx_t_24 < 0) {
__pyx_t_24 += __pyx_pybuffernd_areas.diminfo[0].shape;
if (unlikely(__pyx_t_24 < 0)) __pyx_t_25 = 0;
} else if (unlikely(__pyx_t_24 >= __pyx_pybuffernd_areas.diminfo[0].shape)) __pyx_t_25 = 0;
if (unlikely(__pyx_t_25 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_25);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_iarea = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_areas.rcbuffer->pybuffer.buf, __pyx_t_24, __pyx_pybuffernd_areas.diminfo[0].strides));
/* "nms/cpu_nms.pyx":53
* iy2 = y2[i]
* iarea = areas[i]
* for _j in range(_i + 1, ndets): # <<<<<<<<<<<<<<
* j = order[_j]
* if suppressed[j] == 1:
*/
__pyx_t_25 = __pyx_v_ndets;
for (__pyx_t_26 = (__pyx_v__i + 1); __pyx_t_26 < __pyx_t_25; __pyx_t_26+=1) {
__pyx_v__j = __pyx_t_26;
/* "nms/cpu_nms.pyx":54
* iarea = areas[i]
* for _j in range(_i + 1, ndets):
* j = order[_j] # <<<<<<<<<<<<<<
* if suppressed[j] == 1:
* continue
*/
__pyx_t_27 = __pyx_v__j;
__pyx_t_28 = -1;
if (__pyx_t_27 < 0) {
__pyx_t_27 += __pyx_pybuffernd_order.diminfo[0].shape;
if (unlikely(__pyx_t_27 < 0)) __pyx_t_28 = 0;
} else if (unlikely(__pyx_t_27 >= __pyx_pybuffernd_order.diminfo[0].shape)) __pyx_t_28 = 0;
if (unlikely(__pyx_t_28 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_28);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_j = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int_t *, __pyx_pybuffernd_order.rcbuffer->pybuffer.buf, __pyx_t_27, __pyx_pybuffernd_order.diminfo[0].strides));
/* "nms/cpu_nms.pyx":55
* for _j in range(_i + 1, ndets):
* j = order[_j]
* if suppressed[j] == 1: # <<<<<<<<<<<<<<
* continue
* xx1 = max(ix1, x1[j])
*/
__pyx_t_28 = __pyx_v_j;
__pyx_t_29 = -1;
if (__pyx_t_28 < 0) {
__pyx_t_28 += __pyx_pybuffernd_suppressed.diminfo[0].shape;
if (unlikely(__pyx_t_28 < 0)) __pyx_t_29 = 0;
} else if (unlikely(__pyx_t_28 >= __pyx_pybuffernd_suppressed.diminfo[0].shape)) __pyx_t_29 = 0;
if (unlikely(__pyx_t_29 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_29);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_19 = (((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int_t *, __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.buf, __pyx_t_28, __pyx_pybuffernd_suppressed.diminfo[0].strides)) == 1) != 0);
if (__pyx_t_19) {
/* "nms/cpu_nms.pyx":56
* j = order[_j]
* if suppressed[j] == 1:
* continue # <<<<<<<<<<<<<<
* xx1 = max(ix1, x1[j])
* yy1 = max(iy1, y1[j])
*/
goto __pyx_L6_continue;
}
/* "nms/cpu_nms.pyx":57
* if suppressed[j] == 1:
* continue
* xx1 = max(ix1, x1[j]) # <<<<<<<<<<<<<<
* yy1 = max(iy1, y1[j])
* xx2 = min(ix2, x2[j])
*/
__pyx_t_29 = __pyx_v_j;
__pyx_t_30 = -1;
if (__pyx_t_29 < 0) {
__pyx_t_29 += __pyx_pybuffernd_x1.diminfo[0].shape;
if (unlikely(__pyx_t_29 < 0)) __pyx_t_30 = 0;
} else if (unlikely(__pyx_t_29 >= __pyx_pybuffernd_x1.diminfo[0].shape)) __pyx_t_30 = 0;
if (unlikely(__pyx_t_30 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_30);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_xx1 = __pyx_f_3nms_7cpu_nms_max(__pyx_v_ix1, (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_x1.rcbuffer->pybuffer.buf, __pyx_t_29, __pyx_pybuffernd_x1.diminfo[0].strides)));
/* "nms/cpu_nms.pyx":58
* continue
* xx1 = max(ix1, x1[j])
* yy1 = max(iy1, y1[j]) # <<<<<<<<<<<<<<
* xx2 = min(ix2, x2[j])
* yy2 = min(iy2, y2[j])
*/
__pyx_t_30 = __pyx_v_j;
__pyx_t_31 = -1;
if (__pyx_t_30 < 0) {
__pyx_t_30 += __pyx_pybuffernd_y1.diminfo[0].shape;
if (unlikely(__pyx_t_30 < 0)) __pyx_t_31 = 0;
} else if (unlikely(__pyx_t_30 >= __pyx_pybuffernd_y1.diminfo[0].shape)) __pyx_t_31 = 0;
if (unlikely(__pyx_t_31 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_31);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_yy1 = __pyx_f_3nms_7cpu_nms_max(__pyx_v_iy1, (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_y1.rcbuffer->pybuffer.buf, __pyx_t_30, __pyx_pybuffernd_y1.diminfo[0].strides)));
/* "nms/cpu_nms.pyx":59
* xx1 = max(ix1, x1[j])
* yy1 = max(iy1, y1[j])
* xx2 = min(ix2, x2[j]) # <<<<<<<<<<<<<<
* yy2 = min(iy2, y2[j])
* w = max(0.0, xx2 - xx1 + 1)
*/
__pyx_t_31 = __pyx_v_j;
__pyx_t_32 = -1;
if (__pyx_t_31 < 0) {
__pyx_t_31 += __pyx_pybuffernd_x2.diminfo[0].shape;
if (unlikely(__pyx_t_31 < 0)) __pyx_t_32 = 0;
} else if (unlikely(__pyx_t_31 >= __pyx_pybuffernd_x2.diminfo[0].shape)) __pyx_t_32 = 0;
if (unlikely(__pyx_t_32 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_32);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_xx2 = __pyx_f_3nms_7cpu_nms_min(__pyx_v_ix2, (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_x2.rcbuffer->pybuffer.buf, __pyx_t_31, __pyx_pybuffernd_x2.diminfo[0].strides)));
/* "nms/cpu_nms.pyx":60
* yy1 = max(iy1, y1[j])
* xx2 = min(ix2, x2[j])
* yy2 = min(iy2, y2[j]) # <<<<<<<<<<<<<<
* w = max(0.0, xx2 - xx1 + 1)
* h = max(0.0, yy2 - yy1 + 1)
*/
__pyx_t_32 = __pyx_v_j;
__pyx_t_33 = -1;
if (__pyx_t_32 < 0) {
__pyx_t_32 += __pyx_pybuffernd_y2.diminfo[0].shape;
if (unlikely(__pyx_t_32 < 0)) __pyx_t_33 = 0;
} else if (unlikely(__pyx_t_32 >= __pyx_pybuffernd_y2.diminfo[0].shape)) __pyx_t_33 = 0;
if (unlikely(__pyx_t_33 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_33);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_yy2 = __pyx_f_3nms_7cpu_nms_min(__pyx_v_iy2, (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_y2.rcbuffer->pybuffer.buf, __pyx_t_32, __pyx_pybuffernd_y2.diminfo[0].strides)));
/* "nms/cpu_nms.pyx":61
* xx2 = min(ix2, x2[j])
* yy2 = min(iy2, y2[j])
* w = max(0.0, xx2 - xx1 + 1) # <<<<<<<<<<<<<<
* h = max(0.0, yy2 - yy1 + 1)
* inter = w * h
*/
__pyx_v_w = __pyx_f_3nms_7cpu_nms_max(0.0, ((__pyx_v_xx2 - __pyx_v_xx1) + 1.0));
/* "nms/cpu_nms.pyx":62
* yy2 = min(iy2, y2[j])
* w = max(0.0, xx2 - xx1 + 1)
* h = max(0.0, yy2 - yy1 + 1) # <<<<<<<<<<<<<<
* inter = w * h
* ovr = inter / (iarea + areas[j] - inter)
*/
__pyx_v_h = __pyx_f_3nms_7cpu_nms_max(0.0, ((__pyx_v_yy2 - __pyx_v_yy1) + 1.0));
/* "nms/cpu_nms.pyx":63
* w = max(0.0, xx2 - xx1 + 1)
* h = max(0.0, yy2 - yy1 + 1)
* inter = w * h # <<<<<<<<<<<<<<
* ovr = inter / (iarea + areas[j] - inter)
* if ovr >= thresh:
*/
__pyx_v_inter = (__pyx_v_w * __pyx_v_h);
/* "nms/cpu_nms.pyx":64
* h = max(0.0, yy2 - yy1 + 1)
* inter = w * h
* ovr = inter / (iarea + areas[j] - inter) # <<<<<<<<<<<<<<
* if ovr >= thresh:
* suppressed[j] = 1
*/
__pyx_t_33 = __pyx_v_j;
__pyx_t_34 = -1;
if (__pyx_t_33 < 0) {
__pyx_t_33 += __pyx_pybuffernd_areas.diminfo[0].shape;
if (unlikely(__pyx_t_33 < 0)) __pyx_t_34 = 0;
} else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_areas.diminfo[0].shape)) __pyx_t_34 = 0;
if (unlikely(__pyx_t_34 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_34);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_35 = ((__pyx_v_iarea + (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_areas.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_areas.diminfo[0].strides))) - __pyx_v_inter);
if (unlikely(__pyx_t_35 == 0)) {
#ifdef WITH_THREAD
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();
#endif
PyErr_SetString(PyExc_ZeroDivisionError, "float division");
#ifdef WITH_THREAD
PyGILState_Release(__pyx_gilstate_save);
#endif
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_ovr = (__pyx_v_inter / __pyx_t_35);
/* "nms/cpu_nms.pyx":65
* inter = w * h
* ovr = inter / (iarea + areas[j] - inter)
* if ovr >= thresh: # <<<<<<<<<<<<<<
* suppressed[j] = 1
*
*/
__pyx_t_12 = PyFloat_FromDouble(__pyx_v_ovr); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_12);
__pyx_t_1 = PyObject_RichCompare(__pyx_t_12, __pyx_v_thresh, Py_GE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
__pyx_t_19 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_19 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
if (__pyx_t_19) {
/* "nms/cpu_nms.pyx":66
* ovr = inter / (iarea + areas[j] - inter)
* if ovr >= thresh:
* suppressed[j] = 1 # <<<<<<<<<<<<<<
*
* return keep
*/
__pyx_t_34 = __pyx_v_j;
__pyx_t_36 = -1;
if (__pyx_t_34 < 0) {
__pyx_t_34 += __pyx_pybuffernd_suppressed.diminfo[0].shape;
if (unlikely(__pyx_t_34 < 0)) __pyx_t_36 = 0;
} else if (unlikely(__pyx_t_34 >= __pyx_pybuffernd_suppressed.diminfo[0].shape)) __pyx_t_36 = 0;
if (unlikely(__pyx_t_36 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_36);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int_t *, __pyx_pybuffernd_suppressed.rcbuffer->pybuffer.buf, __pyx_t_34, __pyx_pybuffernd_suppressed.diminfo[0].strides) = 1;
goto __pyx_L9;
}
__pyx_L9:;
__pyx_L6_continue:;
}
__pyx_L3_continue:;
}
/* "nms/cpu_nms.pyx":68
* suppressed[j] = 1
*
* return keep # <<<<<<<<<<<<<<
*/
__Pyx_XDECREF(__pyx_r);
__Pyx_INCREF(__pyx_v_keep);
__pyx_r = __pyx_v_keep;
goto __pyx_L0;
/* "nms/cpu_nms.pyx":17
* return a if a <= b else b
*
* def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): # <<<<<<<<<<<<<<
* cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
* cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_7);
__Pyx_XDECREF(__pyx_t_8);
__Pyx_XDECREF(__pyx_t_11);
__Pyx_XDECREF(__pyx_t_12);
{ PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
__Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_areas.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_suppressed.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_x1.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_x2.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_y1.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_y2.rcbuffer->pybuffer);
__Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
__Pyx_AddTraceback("nms.cpu_nms.cpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
goto __pyx_L2;
__pyx_L0:;
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_areas.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_suppressed.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_x1.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_x2.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_y1.rcbuffer->pybuffer);
__Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_y2.rcbuffer->pybuffer);
__pyx_L2:;
__Pyx_XDECREF((PyObject *)__pyx_v_x1);
__Pyx_XDECREF((PyObject *)__pyx_v_y1);
__Pyx_XDECREF((PyObject *)__pyx_v_x2);
__Pyx_XDECREF((PyObject *)__pyx_v_y2);
__Pyx_XDECREF((PyObject *)__pyx_v_scores);
__Pyx_XDECREF((PyObject *)__pyx_v_areas);
__Pyx_XDECREF((PyObject *)__pyx_v_order);
__Pyx_XDECREF((PyObject *)__pyx_v_suppressed);
__Pyx_XDECREF(__pyx_v_keep);
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":194
* # experimental exception made for __getbuffer__ and __releasebuffer__
* # -- the details of this may change.
* def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<<
* # This implementation of getbuffer is geared towards Cython
* # requirements, and does not yet fullfill the PEP.
*/
/* Python wrapper */
static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
int __pyx_r;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
__pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
int __pyx_v_copy_shape;
int __pyx_v_i;
int __pyx_v_ndim;
int __pyx_v_endian_detector;
int __pyx_v_little_endian;
int __pyx_v_t;
char *__pyx_v_f;
PyArray_Descr *__pyx_v_descr = 0;
int __pyx_v_offset;
int __pyx_v_hasfields;
int __pyx_r;
__Pyx_RefNannyDeclarations
int __pyx_t_1;
int __pyx_t_2;
int __pyx_t_3;
PyObject *__pyx_t_4 = NULL;
int __pyx_t_5;
int __pyx_t_6;
int __pyx_t_7;
PyObject *__pyx_t_8 = NULL;
char *__pyx_t_9;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__getbuffer__", 0);
if (__pyx_v_info != NULL) {
__pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
__Pyx_GIVEREF(__pyx_v_info->obj);
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":200
* # of flags
*
* if info == NULL: return # <<<<<<<<<<<<<<
*
* cdef int copy_shape, i, ndim
*/
__pyx_t_1 = ((__pyx_v_info == NULL) != 0);
if (__pyx_t_1) {
__pyx_r = 0;
goto __pyx_L0;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203
*
* cdef int copy_shape, i, ndim
* cdef int endian_detector = 1 # <<<<<<<<<<<<<<
* cdef bint little_endian = ((&endian_detector)[0] != 0)
*
*/
__pyx_v_endian_detector = 1;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":204
* cdef int copy_shape, i, ndim
* cdef int endian_detector = 1
* cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<<
*
* ndim = PyArray_NDIM(self)
*/
__pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206
* cdef bint little_endian = ((&endian_detector)[0] != 0)
*
* ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<<
*
* if sizeof(npy_intp) != sizeof(Py_ssize_t):
*/
__pyx_v_ndim = PyArray_NDIM(__pyx_v_self);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":208
* ndim = PyArray_NDIM(self)
*
* if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
* copy_shape = 1
* else:
*/
__pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209
*
* if sizeof(npy_intp) != sizeof(Py_ssize_t):
* copy_shape = 1 # <<<<<<<<<<<<<<
* else:
* copy_shape = 0
*/
__pyx_v_copy_shape = 1;
goto __pyx_L4;
}
/*else*/ {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211
* copy_shape = 1
* else:
* copy_shape = 0 # <<<<<<<<<<<<<<
*
* if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
*/
__pyx_v_copy_shape = 0;
}
__pyx_L4:;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":213
* copy_shape = 0
*
* if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<<
* and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
* raise ValueError(u"ndarray is not C contiguous")
*/
__pyx_t_1 = (((__pyx_v_flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) != 0);
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214
*
* if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
* and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<<
* raise ValueError(u"ndarray is not C contiguous")
*
*/
__pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS) != 0)) != 0);
__pyx_t_3 = __pyx_t_2;
} else {
__pyx_t_3 = __pyx_t_1;
}
if (__pyx_t_3) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":215
* if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
* and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
* raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<<
*
* if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
*/
__pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
__Pyx_Raise(__pyx_t_4, 0, 0, 0);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
{__pyx_filename = __pyx_f[1]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217
* raise ValueError(u"ndarray is not C contiguous")
*
* if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<<
* and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
* raise ValueError(u"ndarray is not Fortran contiguous")
*/
__pyx_t_3 = (((__pyx_v_flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) != 0);
if (__pyx_t_3) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218
*
* if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
* and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<<
* raise ValueError(u"ndarray is not Fortran contiguous")
*
*/
__pyx_t_1 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS) != 0)) != 0);
__pyx_t_2 = __pyx_t_1;
} else {
__pyx_t_2 = __pyx_t_3;
}
if (__pyx_t_2) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":219
* if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
* and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
* raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<<
*
* info.buf = PyArray_DATA(self)
*/
__pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
__Pyx_Raise(__pyx_t_4, 0, 0, 0);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
{__pyx_filename = __pyx_f[1]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221
* raise ValueError(u"ndarray is not Fortran contiguous")
*
* info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<<
* info.ndim = ndim
* if copy_shape:
*/
__pyx_v_info->buf = PyArray_DATA(__pyx_v_self);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222
*
* info.buf = PyArray_DATA(self)
* info.ndim = ndim # <<<<<<<<<<<<<<
* if copy_shape:
* # Allocate new buffer for strides and shape info.
*/
__pyx_v_info->ndim = __pyx_v_ndim;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":223
* info.buf = PyArray_DATA(self)
* info.ndim = ndim
* if copy_shape: # <<<<<<<<<<<<<<
* # Allocate new buffer for strides and shape info.
* # This is allocated as one block, strides first.
*/
__pyx_t_2 = (__pyx_v_copy_shape != 0);
if (__pyx_t_2) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226
* # Allocate new buffer for strides and shape info.
* # This is allocated as one block, strides first.
* info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<<
* info.shape = info.strides + ndim
* for i in range(ndim):
*/
__pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2)));
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":227
* # This is allocated as one block, strides first.
* info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2)
* info.shape = info.strides + ndim # <<<<<<<<<<<<<<
* for i in range(ndim):
* info.strides[i] = PyArray_STRIDES(self)[i]
*/
__pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228
* info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2)
* info.shape = info.strides + ndim
* for i in range(ndim): # <<<<<<<<<<<<<<
* info.strides[i] = PyArray_STRIDES(self)[i]
* info.shape[i] = PyArray_DIMS(self)[i]
*/
__pyx_t_5 = __pyx_v_ndim;
for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
__pyx_v_i = __pyx_t_6;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229
* info.shape = info.strides + ndim
* for i in range(ndim):
* info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<<
* info.shape[i] = PyArray_DIMS(self)[i]
* else:
*/
(__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230
* for i in range(ndim):
* info.strides[i] = PyArray_STRIDES(self)[i]
* info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<<
* else:
* info.strides = PyArray_STRIDES(self)
*/
(__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]);
}
goto __pyx_L7;
}
/*else*/ {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232
* info.shape[i] = PyArray_DIMS(self)[i]
* else:
* info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<<
* info.shape = PyArray_DIMS(self)
* info.suboffsets = NULL
*/
__pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self));
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233
* else:
* info.strides = PyArray_STRIDES(self)
* info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<<
* info.suboffsets = NULL
* info.itemsize = PyArray_ITEMSIZE(self)
*/
__pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(__pyx_v_self));
}
__pyx_L7:;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":234
* info.strides = PyArray_STRIDES(self)
* info.shape = PyArray_DIMS(self)
* info.suboffsets = NULL # <<<<<<<<<<<<<<
* info.itemsize = PyArray_ITEMSIZE(self)
* info.readonly = not PyArray_ISWRITEABLE(self)
*/
__pyx_v_info->suboffsets = NULL;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235
* info.shape = PyArray_DIMS(self)
* info.suboffsets = NULL
* info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<<
* info.readonly = not PyArray_ISWRITEABLE(self)
*
*/
__pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236
* info.suboffsets = NULL
* info.itemsize = PyArray_ITEMSIZE(self)
* info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<<
*
* cdef int t
*/
__pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0));
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239
*
* cdef int t
* cdef char* f = NULL # <<<<<<<<<<<<<<
* cdef dtype descr = self.descr
* cdef list stack
*/
__pyx_v_f = NULL;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":240
* cdef int t
* cdef char* f = NULL
* cdef dtype descr = self.descr # <<<<<<<<<<<<<<
* cdef list stack
* cdef int offset
*/
__pyx_t_4 = ((PyObject *)__pyx_v_self->descr);
__Pyx_INCREF(__pyx_t_4);
__pyx_v_descr = ((PyArray_Descr *)__pyx_t_4);
__pyx_t_4 = 0;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":244
* cdef int offset
*
* cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<<
*
* if not hasfields and not copy_shape:
*/
__pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":246
* cdef bint hasfields = PyDataType_HASFIELDS(descr)
*
* if not hasfields and not copy_shape: # <<<<<<<<<<<<<<
* # do not call releasebuffer
* info.obj = None
*/
__pyx_t_2 = ((!(__pyx_v_hasfields != 0)) != 0);
if (__pyx_t_2) {
__pyx_t_3 = ((!(__pyx_v_copy_shape != 0)) != 0);
__pyx_t_1 = __pyx_t_3;
} else {
__pyx_t_1 = __pyx_t_2;
}
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248
* if not hasfields and not copy_shape:
* # do not call releasebuffer
* info.obj = None # <<<<<<<<<<<<<<
* else:
* # need to call releasebuffer
*/
__Pyx_INCREF(Py_None);
__Pyx_GIVEREF(Py_None);
__Pyx_GOTREF(__pyx_v_info->obj);
__Pyx_DECREF(__pyx_v_info->obj);
__pyx_v_info->obj = Py_None;
goto __pyx_L10;
}
/*else*/ {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251
* else:
* # need to call releasebuffer
* info.obj = self # <<<<<<<<<<<<<<
*
* if not hasfields:
*/
__Pyx_INCREF(((PyObject *)__pyx_v_self));
__Pyx_GIVEREF(((PyObject *)__pyx_v_self));
__Pyx_GOTREF(__pyx_v_info->obj);
__Pyx_DECREF(__pyx_v_info->obj);
__pyx_v_info->obj = ((PyObject *)__pyx_v_self);
}
__pyx_L10:;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":253
* info.obj = self
*
* if not hasfields: # <<<<<<<<<<<<<<
* t = descr.type_num
* if ((descr.byteorder == c'>' and little_endian) or
*/
__pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0);
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254
*
* if not hasfields:
* t = descr.type_num # <<<<<<<<<<<<<<
* if ((descr.byteorder == c'>' and little_endian) or
* (descr.byteorder == c'<' and not little_endian)):
*/
__pyx_t_5 = __pyx_v_descr->type_num;
__pyx_v_t = __pyx_t_5;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255
* if not hasfields:
* t = descr.type_num
* if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
* (descr.byteorder == c'<' and not little_endian)):
* raise ValueError(u"Non-native byte order not supported")
*/
__pyx_t_1 = ((__pyx_v_descr->byteorder == '>') != 0);
if (__pyx_t_1) {
__pyx_t_2 = (__pyx_v_little_endian != 0);
} else {
__pyx_t_2 = __pyx_t_1;
}
if (!__pyx_t_2) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256
* t = descr.type_num
* if ((descr.byteorder == c'>' and little_endian) or
* (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<<
* raise ValueError(u"Non-native byte order not supported")
* if t == NPY_BYTE: f = "b"
*/
__pyx_t_1 = ((__pyx_v_descr->byteorder == '<') != 0);
if (__pyx_t_1) {
__pyx_t_3 = ((!(__pyx_v_little_endian != 0)) != 0);
__pyx_t_7 = __pyx_t_3;
} else {
__pyx_t_7 = __pyx_t_1;
}
__pyx_t_1 = __pyx_t_7;
} else {
__pyx_t_1 = __pyx_t_2;
}
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257
* if ((descr.byteorder == c'>' and little_endian) or
* (descr.byteorder == c'<' and not little_endian)):
* raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<<
* if t == NPY_BYTE: f = "b"
* elif t == NPY_UBYTE: f = "B"
*/
__pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__14, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
__Pyx_Raise(__pyx_t_4, 0, 0, 0);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
{__pyx_filename = __pyx_f[1]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274
* elif t == NPY_CDOUBLE: f = "Zd"
* elif t == NPY_CLONGDOUBLE: f = "Zg"
* elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<<
* else:
* raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
*/
switch (__pyx_v_t) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258
* (descr.byteorder == c'<' and not little_endian)):
* raise ValueError(u"Non-native byte order not supported")
* if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<<
* elif t == NPY_UBYTE: f = "B"
* elif t == NPY_SHORT: f = "h"
*/
case NPY_BYTE:
__pyx_v_f = __pyx_k_b;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259
* raise ValueError(u"Non-native byte order not supported")
* if t == NPY_BYTE: f = "b"
* elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<<
* elif t == NPY_SHORT: f = "h"
* elif t == NPY_USHORT: f = "H"
*/
case NPY_UBYTE:
__pyx_v_f = __pyx_k_B;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260
* if t == NPY_BYTE: f = "b"
* elif t == NPY_UBYTE: f = "B"
* elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<<
* elif t == NPY_USHORT: f = "H"
* elif t == NPY_INT: f = "i"
*/
case NPY_SHORT:
__pyx_v_f = __pyx_k_h;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261
* elif t == NPY_UBYTE: f = "B"
* elif t == NPY_SHORT: f = "h"
* elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<<
* elif t == NPY_INT: f = "i"
* elif t == NPY_UINT: f = "I"
*/
case NPY_USHORT:
__pyx_v_f = __pyx_k_H;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262
* elif t == NPY_SHORT: f = "h"
* elif t == NPY_USHORT: f = "H"
* elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<<
* elif t == NPY_UINT: f = "I"
* elif t == NPY_LONG: f = "l"
*/
case NPY_INT:
__pyx_v_f = __pyx_k_i;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263
* elif t == NPY_USHORT: f = "H"
* elif t == NPY_INT: f = "i"
* elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<<
* elif t == NPY_LONG: f = "l"
* elif t == NPY_ULONG: f = "L"
*/
case NPY_UINT:
__pyx_v_f = __pyx_k_I;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264
* elif t == NPY_INT: f = "i"
* elif t == NPY_UINT: f = "I"
* elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<<
* elif t == NPY_ULONG: f = "L"
* elif t == NPY_LONGLONG: f = "q"
*/
case NPY_LONG:
__pyx_v_f = __pyx_k_l;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265
* elif t == NPY_UINT: f = "I"
* elif t == NPY_LONG: f = "l"
* elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<<
* elif t == NPY_LONGLONG: f = "q"
* elif t == NPY_ULONGLONG: f = "Q"
*/
case NPY_ULONG:
__pyx_v_f = __pyx_k_L;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266
* elif t == NPY_LONG: f = "l"
* elif t == NPY_ULONG: f = "L"
* elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<<
* elif t == NPY_ULONGLONG: f = "Q"
* elif t == NPY_FLOAT: f = "f"
*/
case NPY_LONGLONG:
__pyx_v_f = __pyx_k_q;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267
* elif t == NPY_ULONG: f = "L"
* elif t == NPY_LONGLONG: f = "q"
* elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<<
* elif t == NPY_FLOAT: f = "f"
* elif t == NPY_DOUBLE: f = "d"
*/
case NPY_ULONGLONG:
__pyx_v_f = __pyx_k_Q;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268
* elif t == NPY_LONGLONG: f = "q"
* elif t == NPY_ULONGLONG: f = "Q"
* elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<<
* elif t == NPY_DOUBLE: f = "d"
* elif t == NPY_LONGDOUBLE: f = "g"
*/
case NPY_FLOAT:
__pyx_v_f = __pyx_k_f;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269
* elif t == NPY_ULONGLONG: f = "Q"
* elif t == NPY_FLOAT: f = "f"
* elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<<
* elif t == NPY_LONGDOUBLE: f = "g"
* elif t == NPY_CFLOAT: f = "Zf"
*/
case NPY_DOUBLE:
__pyx_v_f = __pyx_k_d;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270
* elif t == NPY_FLOAT: f = "f"
* elif t == NPY_DOUBLE: f = "d"
* elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<<
* elif t == NPY_CFLOAT: f = "Zf"
* elif t == NPY_CDOUBLE: f = "Zd"
*/
case NPY_LONGDOUBLE:
__pyx_v_f = __pyx_k_g;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271
* elif t == NPY_DOUBLE: f = "d"
* elif t == NPY_LONGDOUBLE: f = "g"
* elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<<
* elif t == NPY_CDOUBLE: f = "Zd"
* elif t == NPY_CLONGDOUBLE: f = "Zg"
*/
case NPY_CFLOAT:
__pyx_v_f = __pyx_k_Zf;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272
* elif t == NPY_LONGDOUBLE: f = "g"
* elif t == NPY_CFLOAT: f = "Zf"
* elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<<
* elif t == NPY_CLONGDOUBLE: f = "Zg"
* elif t == NPY_OBJECT: f = "O"
*/
case NPY_CDOUBLE:
__pyx_v_f = __pyx_k_Zd;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273
* elif t == NPY_CFLOAT: f = "Zf"
* elif t == NPY_CDOUBLE: f = "Zd"
* elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<<
* elif t == NPY_OBJECT: f = "O"
* else:
*/
case NPY_CLONGDOUBLE:
__pyx_v_f = __pyx_k_Zg;
break;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274
* elif t == NPY_CDOUBLE: f = "Zd"
* elif t == NPY_CLONGDOUBLE: f = "Zg"
* elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<<
* else:
* raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
*/
case NPY_OBJECT:
__pyx_v_f = __pyx_k_O;
break;
default:
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276
* elif t == NPY_OBJECT: f = "O"
* else:
* raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<<
* info.format = f
* return
*/
__pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
__pyx_t_8 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_4); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_8);
__Pyx_GIVEREF(__pyx_t_8);
__pyx_t_8 = 0;
__pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__Pyx_Raise(__pyx_t_8, 0, 0, 0);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
{__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
break;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277
* else:
* raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
* info.format = f # <<<<<<<<<<<<<<
* return
* else:
*/
__pyx_v_info->format = __pyx_v_f;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":278
* raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
* info.format = f
* return # <<<<<<<<<<<<<<
* else:
* info.format = stdlib.malloc(_buffer_format_string_len)
*/
__pyx_r = 0;
goto __pyx_L0;
}
/*else*/ {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280
* return
* else:
* info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<<
* info.format[0] = c'^' # Native data types, manual alignment
* offset = 0
*/
__pyx_v_info->format = ((char *)malloc(255));
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281
* else:
* info.format = stdlib.malloc(_buffer_format_string_len)
* info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<<
* offset = 0
* f = _util_dtypestring(descr, info.format + 1,
*/
(__pyx_v_info->format[0]) = '^';
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":282
* info.format = stdlib.malloc(_buffer_format_string_len)
* info.format[0] = c'^' # Native data types, manual alignment
* offset = 0 # <<<<<<<<<<<<<<
* f = _util_dtypestring(descr, info.format + 1,
* info.format + _buffer_format_string_len,
*/
__pyx_v_offset = 0;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283
* info.format[0] = c'^' # Native data types, manual alignment
* offset = 0
* f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<<
* info.format + _buffer_format_string_len,
* &offset)
*/
__pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 255), (&__pyx_v_offset)); if (unlikely(__pyx_t_9 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_f = __pyx_t_9;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286
* info.format + _buffer_format_string_len,
* &offset)
* f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<<
*
* def __releasebuffer__(ndarray self, Py_buffer* info):
*/
(__pyx_v_f[0]) = '\x00';
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":194
* # experimental exception made for __getbuffer__ and __releasebuffer__
* # -- the details of this may change.
* def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<<
* # This implementation of getbuffer is geared towards Cython
* # requirements, and does not yet fullfill the PEP.
*/
/* function exit code */
__pyx_r = 0;
goto __pyx_L0;
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_8);
__Pyx_AddTraceback("numpy.ndarray.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = -1;
if (__pyx_v_info != NULL && __pyx_v_info->obj != NULL) {
__Pyx_GOTREF(__pyx_v_info->obj);
__Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = NULL;
}
goto __pyx_L2;
__pyx_L0:;
if (__pyx_v_info != NULL && __pyx_v_info->obj == Py_None) {
__Pyx_GOTREF(Py_None);
__Pyx_DECREF(Py_None); __pyx_v_info->obj = NULL;
}
__pyx_L2:;
__Pyx_XDECREF((PyObject *)__pyx_v_descr);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288
* f[0] = c'\0' # Terminate format string
*
* def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<<
* if PyArray_HASFIELDS(self):
* stdlib.free(info.format)
*/
/* Python wrapper */
static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/
static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0);
__pyx_pf_5numpy_7ndarray_2__releasebuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info));
/* function exit code */
__Pyx_RefNannyFinishContext();
}
static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
__Pyx_RefNannyDeclarations
int __pyx_t_1;
__Pyx_RefNannySetupContext("__releasebuffer__", 0);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289
*
* def __releasebuffer__(ndarray self, Py_buffer* info):
* if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<<
* stdlib.free(info.format)
* if sizeof(npy_intp) != sizeof(Py_ssize_t):
*/
__pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0);
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290
* def __releasebuffer__(ndarray self, Py_buffer* info):
* if PyArray_HASFIELDS(self):
* stdlib.free(info.format) # <<<<<<<<<<<<<<
* if sizeof(npy_intp) != sizeof(Py_ssize_t):
* stdlib.free(info.strides)
*/
free(__pyx_v_info->format);
goto __pyx_L3;
}
__pyx_L3:;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291
* if PyArray_HASFIELDS(self):
* stdlib.free(info.format)
* if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
* stdlib.free(info.strides)
* # info.shape was stored after info.strides in the same block
*/
__pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
if (__pyx_t_1) {
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292
* stdlib.free(info.format)
* if sizeof(npy_intp) != sizeof(Py_ssize_t):
* stdlib.free(info.strides) # <<<<<<<<<<<<<<
* # info.shape was stored after info.strides in the same block
*
*/
free(__pyx_v_info->strides);
goto __pyx_L4;
}
__pyx_L4:;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288
* f[0] = c'\0' # Terminate format string
*
* def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<<
* if PyArray_HASFIELDS(self):
* stdlib.free(info.format)
*/
/* function exit code */
__Pyx_RefNannyFinishContext();
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768
* ctypedef npy_cdouble complex_t
*
* cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(1, a)
*
*/
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769
*
* cdef inline object PyArray_MultiIterNew1(a):
* return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<<
*
* cdef inline object PyArray_MultiIterNew2(a, b):
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768
* ctypedef npy_cdouble complex_t
*
* cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(1, a)
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771
* return PyArray_MultiIterNew(1, a)
*
* cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(2, a, b)
*
*/
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772
*
* cdef inline object PyArray_MultiIterNew2(a, b):
* return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<<
*
* cdef inline object PyArray_MultiIterNew3(a, b, c):
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771
* return PyArray_MultiIterNew(1, a)
*
* cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(2, a, b)
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774
* return PyArray_MultiIterNew(2, a, b)
*
* cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(3, a, b, c)
*
*/
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775
*
* cdef inline object PyArray_MultiIterNew3(a, b, c):
* return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<<
*
* cdef inline object PyArray_MultiIterNew4(a, b, c, d):
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 775; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774
* return PyArray_MultiIterNew(2, a, b)
*
* cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(3, a, b, c)
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "/home/xinleic/anaconda/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777
* return PyArray_MultiIterNew(3, a, b, c)
*
* cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
* return PyArray_MultiIterNew(4, a, b, | |